diff --git a/.circleci/config.yml b/.circleci/config.yml index a17560d..4174b62 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - codecov: codecov/codecov@3.3.0 + codecov: codecov/codecov@5.0.3 jobs: test: @@ -21,13 +21,21 @@ jobs: paths: - "~/.cache/pip" - run: pip3 install --upgrade pip - - run: make install deps-test-ubuntu PIP_INSTALL="pip3 install" - - run: make coverage - - codecov/upload + - run: + name: install dependencies and package + command: make deps-test-ubuntu install PIP_INSTALL="pip3 install" - save_cache: key: v01-pydeps-<< parameters.python-image >>-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }} paths: - "~/.cache/pip" + - run: + name: run regression test and coverage test + command: make coverage + no_output_timeout: 30m + - codecov/upload + - store_artifacts: + path: htmlcov + resource_class: large workflows: build: diff --git a/LICENSE b/LICENSE index bc7973a..99fbb49 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,676 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2020 Konstantin Baierer, Mike Gerber - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2020 The tfaip Authors. All rights reserved. + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile index cfa7cd8..356d426 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,12 @@ export # export variables to subshells PIP_INSTALL = pip3 install GIT_CLONE = git clone PYTHON = python3 -PYTEST_ARGS = -W 'ignore::DeprecationWarning' -W 'ignore::FutureWarning' -MODEL = qurator-gt4histocr-1.0 +PYTEST_ARGS = -W 'ignore::DeprecationWarning' -W 'ignore::FutureWarning' -vv +# not usable with Calamari 2 ATM - see Calamari#362 +#MODEL = qurator-gt4histocr-1.0 # cannot be migrated to Calamari 2 +#MODEL = deep3_fraktur19 # too large for CI +MODEL = fraktur_19th_century +export MODEL # needed for pytest model selection EXAMPLE = actevedef_718448162.first-page+binarization+segmentation # BEGIN-EVAL makefile-parser --make-help Makefile @@ -82,12 +86,13 @@ assets-clean: # Run unit tests test: test/assets $(MODEL) # declare -p HTTP_PROXY - $(PYTHON) -m pytest --continue-on-collection-errors test $(PYTEST_ARGS) + $(PYTHON) -m pytest --continue-on-collection-errors --durations=0 test $(PYTEST_ARGS) # Run unit tests and determine test coverage coverage: test/assets $(MODEL) coverage erase make test PYTHON="coverage run" + coverage combine coverage report coverage html diff --git a/ocrd_calamari/config.py b/ocrd_calamari/config.py deleted file mode 100644 index 1729f8c..0000000 --- a/ocrd_calamari/config.py +++ /dev/null @@ -1,5 +0,0 @@ -import json - -from pkg_resources import resource_string - -OCRD_TOOL = json.loads(resource_string(__name__, "ocrd-tool.json").decode("utf8")) diff --git a/ocrd_calamari/fix_calamari1_model.py b/ocrd_calamari/fix_calamari1_model.py deleted file mode 100644 index 4989594..0000000 --- a/ocrd_calamari/fix_calamari1_model.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import re -from copy import deepcopy -from glob import glob - -import click - -from ocrd_calamari.util import working_directory - - -@click.command -@click.argument("checkpoint_dir") -def fix_calamari1_model(checkpoint_dir): - """ - Fix old Calamari 1 models. - - This currently means fixing regexen in "replacements" to have their global flags - in front of the rest of the regex. - """ - with working_directory(checkpoint_dir): - for fn in glob("*.json"): - with open(fn, "r") as fp: - j = json.load(fp) - old_j = deepcopy(j) - - for v in j["model"].values(): - if not isinstance(v, dict): - continue - for child in v.get("children", []): - for replacement in child.get("replacements", []): - # Move global flags in front - replacement["old"] = re.sub( - r"^(.*)\(\?u\)$", r"(?u)\1", replacement["old"] - ) - - if j == old_j: - print(f"{fn} unchanged.") - else: - with open(fn, "w") as fp: - json.dump(j, fp, indent=2) - print(f"{fn} fixed.") diff --git a/ocrd_calamari/ocrd-tool.json b/ocrd_calamari/ocrd-tool.json index 6bdb971..3a3a6b7 100644 --- a/ocrd_calamari/ocrd-tool.json +++ b/ocrd_calamari/ocrd-tool.json @@ -1,6 +1,6 @@ { "git_url": "https://github.com/OCR-D/ocrd_calamari", - "version": "1.0.6", + "version": "2.0.0", "tools": { "ocrd-calamari-recognize": { "executable": "ocrd-calamari-recognize", @@ -11,24 +11,28 @@ "recognition/text-recognition" ], "description": "Recognize lines with Calamari", - "input_file_grp": [ - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-OCR-CALAMARI" - ], + "input_file_grp_cardinality": 1, + "output_file_grp_cardinality": 1, "parameters": { + "device": { + "description": "Select computing device for Tensorflow (-1 for CPU, 0 for first CUDA GPU etc.). Downgraded to CPU if not available.", + "type": "number", + "format": "integer", + "default": 0 + }, "checkpoint_dir": { - "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory", + "description": "The directory (name or path) containing Calamari model files (*.ckpt.json). Will use all checkpoints in that directory.", "type": "string", "format": "uri", "content-type": "text/directory", "cacheable": true, - "default": "qurator-gt4histocr-1.0" + "required": true }, "voter": { "description": "The voting algorithm to use", - "type": "string", "default": "confidence_voter_default_ctc" + "type": "string", + "default": "confidence_voter_default_ctc", + "enum": ["confidence_voter_default_ctc", "sequence_voter"] }, "textequiv_level": { "type": "string", @@ -50,107 +54,161 @@ "name": "qurator-gt4histocr-1.0", "description": "Calamari model trained with GT4HistOCR", "size": 90275264, - "version_range": ">= 1.0.0" - }, - { - "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_fraktur19-1.tar.gz", - "type": "archive", - "name": "zpd-fraktur19", - "description": "Model trained on 19th century german fraktur", - "path_in_archive": "c1_fraktur19-1", - "size": 86009886, - "version_range": ">= 1.0.0" + "version_range": ">= 1.0.0, < 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_latin-script-hist-3.tar.gz", - "type": "archive", - "name": "zpd-latin-script-hist-3", - "path_in_archive": "c1_latin-script-hist-3", - "description": "Model trained on historical latin-script texts", - "size": 88416863, - "version_range": ">= 1.0.0" - }, - { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/antiqua_historical.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/antiqua_historical.tar.gz", "type": "archive", "name": "antiqua_historical", "path_in_archive": "antiqua_historical", "description": "Antiqua parts of GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", - "size": 89615540, - "version_range": ">= 1.0.0" + "size": 30633860, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/antiqua_historical_ligs.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/antiqua_historical_ligs.tar.gz", "type": "archive", "name": "antiqua_historical_ligs", "path_in_archive": "antiqua_historical_ligs", "description": "Antiqua parts of GT4HistOCR with enriched ligatures from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", - "size": 87540762, - "version_range": ">= 1.0.0" + "size": 30368081, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_19th_century.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/fraktur_19th_century.tar.gz", "type": "archive", "name": "fraktur_19th_century", "path_in_archive": "fraktur_19th_century", "description": "Fraktur 19th century parts of GT4HistOCR mixed with Fraktur data from Archiscribe and jze from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale and nlbin, NFC)", - "size": 83895140, - "version_range": ">= 1.0.0" + "size": 30018408, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_historical.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/fraktur_historical.tar.gz", "type": "archive", "name": "fraktur_historical", "path_in_archive": "fraktur_historical", "description": "Fraktur parts of GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", - "size": 87807639, - "version_range": ">= 1.0.0" + "size": 30232783, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/fraktur_historical_ligs.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/fraktur_historical_ligs.tar.gz", "type": "archive", "name": "fraktur_historical_ligs", "path_in_archive": "fraktur_historical_ligs", "description": "Fraktur parts of GT4HistOCR with enriched ligatures from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", - "size": 88039551, - "version_range": ">= 1.0.0" + "size": 30622320, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/gt4histocr.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/gt4histocr.tar.gz", "type": "archive", "name": "gt4histocr", "path_in_archive": "gt4histocr", "description": "GT4HistOCR from Calamari-OCR/calamari_models (5-fold ensemble, normalized grayscale, NFC)", - "size": 90107851, - "version_range": ">= 1.0.0" + "size": 31159925, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/historical_french.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/historical_french.tar.gz", "type": "archive", "name": "historical_french", "path_in_archive": "historical_french", "description": "17-19th century French prints from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFC)", - "size": 87335250, - "version_range": ">= 1.0.0" + "size": 30257128, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/idiotikon.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/idiotikon.tar.gz", "type": "archive", "name": "idiotikon", "path_in_archive": "idiotikon", "description": "Antiqua UW3 finetuned on Antiqua Idiotikon dictionary with many diacritics from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFD)", - "size": 100807764, - "version_range": ">= 1.0.0" + "size": 30474541, + "version_range": ">= 2.0.0" }, { - "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/1.1/uw3-modern-english.zip", + "url": "https://github.com/Calamari-OCR/calamari_models/releases/download/2.2/uw3-modern-english.tar.gz", "type": "archive", "name": "uw3-modern-english", "path_in_archive": "uw3-modern-english", "description": "Antiqua UW3 corpus from Calamari-OCR/calamari_models (5-fold ensemble, nlbin, NFC)", - "size": 85413520, - "version_range": ">= 1.0.0" - } + "size": 29897592, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_fraktur19.tar.gz", + "type": "archive", + "name": "deep3_fraktur19", + "path_in_archive": "deep3_fraktur19", + "description": "Model trained on 19th century German Fraktur, like zpd-fraktur19 but deeper (5-fold ensemble, nlbin, NFC) - val_CER=0.5%", + "size": 92555871, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_fraktur-hist.tar.gz", + "type": "archive", + "name": "deep3_fraktur-hist", + "path_in_archive": "deep3_fraktur-hist", + "description": "Model trained on 16th to 19th century German Fraktur, like fraktur-historical but deeper (5-fold ensemble, nlbin, NFC) - val_CER=0.9%", + "size": 92504515, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_antiqua-hist.tar.gz", + "type": "archive", + "name": "deep3_antiqua-hist", + "path_in_archive": "deep3_antiqua-hist", + "description": "Model trained on 16th to 19th century Antiqua, like antiqua-historical but deeper (5-fold ensemble, nlbin, NFC) - val_CER=0.5%", + "size": 92614001, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_antiqua-15-16-cent.tar.gz", + "type": "archive", + "name": "deep3_antiqua-15-16-cent", + "path_in_archive": "deep3_antiqua-15-16-cent", + "description": "Model trained on 15th and 16th century Latin Antiqua, like zpd-latin-script-hist-3 but deeper (5-fold ensemble, nlbin, NFC) - val_CER=0.5%", + "size": 92627999, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_lsh4.tar.gz", + "type": "archive", + "name": "deep3_lsh4", + "path_in_archive": "deep3_lsh4", + "description": "Model trained on 15th and 19th century on German, Latin, French etc. in Antiqua and Fraktur, like gt4histocr but deeper (5-fold ensemble, nlbin, NFC) - val_CER=1.6%", + "size": 92579708, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_htr-bastard.tar.gz", + "type": "archive", + "name": "deep3_htr-bastard", + "path_in_archive": "deep3_htr-bastard", + "description": "Model trained on 13th to 16th century German Gothic Bastarda (5-fold ensemble, nlbin, NFC) - val_CER=6.7%", + "size": 91539649, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/deep3_htr-gothic.tar.gz", + "type": "archive", + "name": "deep3_htr-gothic", + "path_in_archive": "deep3_htr-gothic", + "description": "Model trained on 13th to 16th century German Gothic Cursiva (5-fold ensemble, nlbin, NFC) - val_CER=2.5%", + "size": 91499098, + "version_range": ">= 2.0.0" + }, + { + "url": "https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.3/def_arabic.tar.gz", + "type": "archive", + "name": "def_arabic", + "path_in_archive": "def_arabic", + "description": "Model trained for Arabic on ? (5-fold ensemble, nlbin, NFC) - val_CER=1.2%", + "size": 30651139, + "version_range": ">= 2.0.0" + } ] } } diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index edf69b9..3f5199d 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -1,393 +1,699 @@ from __future__ import absolute_import +from typing import Optional +from functools import cached_property import itertools -import os from glob import glob +import queue +import multiprocessing as mp +from threading import Thread +import logging +import weakref import numpy as np -from ocrd import Processor -from ocrd_modelfactory import page_from_file +import cv2 as cv +from ocrd import Processor, OcrdPage, OcrdPageResult from ocrd_models.ocrd_page import ( CoordsType, GlyphType, TextEquivType, WordType, - to_xml, ) from ocrd_utils import ( - MIMETYPE_PAGE, - assert_file_grp_cardinality, + VERSION as OCRD_VERSION, coordinates_for_segment, - getLogger, - make_file_id, points_from_polygon, polygon_from_x0y0x1y1, tf_disable_interactive_logs, + initLogging, + config ) -# Disable tensorflow/keras logging via print before importing calamari -# (and disable ruff's import checks and sorting here) -# ruff: noqa: E402 -# ruff: isort: off -tf_disable_interactive_logs() - -from tensorflow import __version__ as tensorflow_version -from calamari_ocr import __version__ as calamari_version -from calamari_ocr.ocr import MultiPredictor -from calamari_ocr.ocr.voting import voter_from_proto -from calamari_ocr.proto import VoterParams -from tensorflow import config as tensorflow_config - # ruff: isort: on -from ocrd_calamari.config import OCRD_TOOL - -TOOL = "ocrd-calamari-recognize" +# BATCH_SIZE = 96 # size at smallest bound +# GROUP_BOUNDS = [100, 200, 400, 800, 1600, 3200, 6400] +# # default tfaip bucket_batch_sizes is buggy (inverse quotient) +# BATCH_GROUPS = [max(1, (min(GROUP_BOUNDS) * BATCH_SIZE) // length) +# for length in GROUP_BOUNDS] + [1] +# we cannot use bucket_by_sequence_length (variable batch size), +# because that would require exhausting the iterator +BATCH_SIZE = 12 -BATCH_SIZE = 64 - -def batched_length_limited(iterable, n, limit=32000): - # batched('ABCDEFG', 3) → ABC DEF G - if n < 1: - raise ValueError('n must be at least one') - iterator = iter(iterable) - while batch := tuple(itertools.islice(iterator, n)): - # implement poor man's batch bucketing to avoid OOM: - maxlen = max(image.shape[1] for image in batch) - if maxlen * n > limit and n > 1: - yield from batched_length_limited(batch, n//2) - else: - yield batch class CalamariRecognize(Processor): - def __init__(self, *args, **kwargs): - kwargs["ocrd_tool"] = OCRD_TOOL["tools"][TOOL] - kwargs["version"] = "%s (calamari %s, tensorflow %s)" % ( - OCRD_TOOL["version"], - calamari_version, - tensorflow_version, + @property + def executable(self): + return 'ocrd-calamari-recognize' + + def show_version(self): + from tensorflow import __version__ as tensorflow_version + from calamari_ocr import __version__ as calamari_version + from tfaip import __version__ as tfaip_version + print(f"Version {self.version}, " + f"calamari {calamari_version}, " + f"tfaip {tfaip_version}, " + f"tensorflow {tensorflow_version}, " + f"ocrd/core {OCRD_VERSION}" ) - super(CalamariRecognize, self).__init__(*args, **kwargs) - if hasattr(self, "output_file_grp"): - # processing context - self.setup() def setup(self): """ Set up the model prior to processing. """ - log = getLogger("processor.CalamariRecognize") - devices = tensorflow_config.list_physical_devices("GPU") - for device in devices: - log.info("using GPU device %s", device) - tensorflow_config.experimental.set_memory_growth(device, True) - resolved = self.resolve_resource(self.parameter["checkpoint_dir"]) - checkpoints = glob("%s/*.ckpt.json" % resolved) - self.predictor = MultiPredictor(checkpoints=checkpoints, batch_size=BATCH_SIZE) - log.info("loaded model %s", resolved) - - self.network_input_channels = self.predictor.predictors[ - 0 - ].network.input_channels - # not used: - # self.network_input_channels = \ - # self.predictor.predictors[0].network_params.channels - # not used: - # binarization = \ - # self.predictor.predictors[0].model_params\ - # .data_preprocessor.binarization + # binarization = any(isinstance(preproc, calamari_ocr.ocr.dataset.imageprocessors.center_normalizer.CenterNormalizerProcessorParams) for preproc in self.predictor.data.params.pre_proc.processors) # self.features = ('' if self.network_input_channels != 1 else # 'binarized' if binarization != 'GRAY' else # 'grayscale_normalized') self.features = "" - voter_params = VoterParams() - voter_params.type = VoterParams.Type.Value(self.parameter["voter"].upper()) - self.voter = voter_from_proto(voter_params) - - def process(self): + # Run in a background thread so GPU parts can be interleaved with CPU pre-/post-processing across pages. + # We cannot use a ProcessPoolExecutor (or even ThreadPoolExecutor) for this, + # because that relies on threads to set up IPC, but when process_workspace + # starts forking/spawning subprocesses, these threads will break. + # (And we cannot use multithreading for process_workspace either, because + # Python's GIL would not allow true multiscalar compuation in the first place.) + # So instead, here we setup our own subprocess+queueing solution. + self.predictor = CalamariPredictor( + self.parameter['device'], + self.parameter["voter"], + self.resolve_resource(self.parameter["checkpoint_dir"]) + ) + self.logger.debug("model's network_input_channels is %d", self.network_input_channels) + + @cached_property + def network_input_channels(self): + # as a special case, this information from the model is needed prior to + # prediction, but must be retrieved from the background process as soon as + # the model is loaded, so this will block upon first invocation + input_channels = self.predictor.network_input_channels + return input_channels + + def shutdown(self): + if getattr(self, 'predictor', None): + self.predictor.shutdown() + del self.predictor + + def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult: """ - Perform text recognition with Calamari on the workspace. + Perform text recognition with Calamari. If ``texequiv_level`` is ``word`` or ``glyph``, then additionally create word / glyph level segments by splitting at white space characters / glyph boundaries. In the case of ``glyph``, add all alternative character hypotheses down to ``glyph_conf_cutoff`` confidence threshold. """ - log = getLogger("processor.CalamariRecognize") - - assert_file_grp_cardinality(self.input_file_grp, 1) - assert_file_grp_cardinality(self.output_file_grp, 1) - - for n, input_file in enumerate(self.input_files): - page_id = input_file.pageId or input_file.ID - log.info("INPUT FILE %i / %s", n, page_id) - pcgts = page_from_file(self.workspace.download_file(input_file)) + pcgts = input_pcgts[0] + page = pcgts.get_Page() + page_image, page_coords, page_image_info = self.workspace.image_from_page( + page, page_id, feature_selector=self.features + ) - page = pcgts.get_Page() - page_image, page_coords, page_image_info = self.workspace.image_from_page( - page, page_id, feature_selector=self.features + tasks = [] + class TaskThread(Thread): + def run(self): + try: + super().run() + self.exc = None + except Exception as exc: + self.exc = exc + def join(self, timeout=None): + super().join(timeout=timeout) + if self.exc: + raise self.exc from None + maxw = 0 + for region in page.get_AllRegions(classes=["Text"]): + region_image, region_coords = self.workspace.image_from_segment( + region, page_image, page_coords, feature_selector=self.features ) - lines = [] - for region in page.get_AllRegions(classes=["Text"]): - region_image, region_coords = self.workspace.image_from_segment( - region, page_image, page_coords, feature_selector=self.features + textlines = region.get_TextLine() + self.logger.info( + "About to recognize %i lines of region '%s'", + len(textlines), + region.id, + ) + for line in textlines: + self.logger.debug( + "Recognizing line '%s' in region '%s'", line.id, region.id ) - textlines = region.get_TextLine() - log.info( - "About to recognize %i lines of region '%s'", - len(textlines), - region.id, + line_image, line_coords = self.workspace.image_from_segment( + line, + region_image, + region_coords, + feature_selector=self.features, ) - for line in textlines: - log.debug( - "Recognizing line '%s' in region '%s'", line.id, region.id + if ( + "binarized" not in line_coords["features"] + and "grayscale_normalized" not in line_coords["features"] + and self.network_input_channels == 1 + ): + # We cannot use a feature selector for this since we don't + # know whether the model expects (has been trained on) + # binarized or grayscale images; but raw images are likely + # always inadequate: + self.logger.warning( + "Using raw image for line '%s' in region '%s'", + line.id, + region.id, ) - line_image, line_coords = self.workspace.image_from_segment( - line, - region_image, - region_coords, - feature_selector=self.features, - ) - if ( - "binarized" not in line_coords["features"] - and "grayscale_normalized" not in line_coords["features"] - and self.network_input_channels == 1 - ): - # We cannot use a feature selector for this since we don't - # know whether the model expects (has been trained on) - # binarized or grayscale images; but raw images are likely - # always inadequate: - log.warning( - "Using raw image for line '%s' in region '%s'", - line.id, - region.id, - ) - - if ( - not all(line_image.size) - or line_image.height <= 8 - or line_image.width <= 8 - or "binarized" in line_coords["features"] - and line_image.convert("1").getextrema()[0] == 255 - ): - # empty size or too tiny or no foreground at all: skip - log.warning( - "Skipping empty line '%s' in region '%s'", - line.id, - region.id, - ) - continue - lines.append((line, line_coords, np.array(line_image, dtype=np.uint8))) - - if len(lines): - lines, coords, images = zip(*lines) - else: - log.warning("No text lines on page '%s'", page_id) - lines, coords, images = [], [], [] - - # not exposed in MultiPredictor yet, cf. calamari#361: - # raw_results_all = self.predictor.predict_raw(images, progress_bar=False, batch_size=BATCH_SIZE) - # avoid too large a batch size (causing OOM on CPU or GPU) - fun = lambda x: self.predictor.predict_raw(x, progress_bar=False) - results = itertools.chain.from_iterable( - map(fun, batched_length_limited(images, BATCH_SIZE))) - - for line, line_coords, raw_results in zip(lines, coords, results): - for i, p in enumerate(raw_results): - p.prediction.id = "fold_{}".format(i) - - prediction = self.voter.vote_prediction_result(raw_results) - prediction.id = "voted" - - # Build line text on our own - # - # Calamari does whitespace post-processing on prediction.sentence, - # while it does not do the same on prediction.positions. Do it on - # our own to have consistency. - # - # XXX Check Calamari's built-in post-processing on - # prediction.sentence - - def _sort_chars(p): - """Filter and sort chars of prediction p""" - chars = p.chars - chars = [ - c for c in chars if c.char - ] # XXX Note that omission probabilities are not normalized?! - chars = [ - c - for c in chars - if c.probability >= self.parameter["glyph_conf_cutoff"] - ] - chars = sorted(chars, key=lambda k: k.probability, reverse=True) - return chars - - def _drop_leading_spaces(positions): - return list( - itertools.dropwhile( - lambda p: _sort_chars(p)[0].char == " ", positions - ) + line_img = load_image( + np.array(line_image, dtype=np.uint8), + self.network_input_channels + ) + if ( + not all(line_image.size) + or line_image.height <= 8 + or line_image.width <= 8 + or "binarized" in line_coords["features"] + and line_img.min() == 255 + ): + # empty size or too tiny or no foreground at all: skip + self.logger.warning( + "Skipping empty line '%s' in region '%s'", + line.id, + region.id, ) + continue + + tasks.append(TaskThread(target=self._process_line, + args=(line, line_coords, line_img, page_id), + name="LinePredictor-%s-%s" % (page_id, line.id))) + tasks[-1].start() + + if not len(tasks): + self.logger.warning("No text lines on page '%s'", page_id) + return OcrdPageResult(pcgts) + + # We cannot delegate to predictor.predict_raw directly... + # predictions = self.predictor.predict_raw(images) + # ...because for efficiency, all page tasks must be synchronised + # on a single GPU-bound subprocess (no more than 1 simulatneous call). + # Moreover, we also cannot use predict_raw indirectly... + # taskq.put((page_id, images)) + # page_id, images = taskq.get() + # result = predictor.predict_raw(images) + # resultq.put((page_id, result)) + # predictions = resultq.get(page_id) + # ...because this would create a new pipeline for each page, + # which is wildly inefficient. + # Moreover, predict_raw() uses predict_dataset(), which is peaky + # itself. + # Instead, we interleave and flow line imges from all pages into + # a pipeline based on predict_on_batch(), which gets set up only once. + # Each sample is annotated with page+line metadata for re-identification. + # All page workers (subprocesses) communicate with the single predictor worker + # (subprocess) via queues and a single lock that controls whether or not batches + # are filled up with dummy data (as long as workers are still waiting for results). + Thread(target=self.predictor.fill.acquire, name="PagePredictor-fillneededby-%s" % page_id).start() + for task in tasks: + task.join() + Thread(target=self.predictor.fill.release, name="PagePredictor-fillnotneededby-%s" % page_id).start() + self.logger.info("All lines completed for page '%s'", page_id) + + _page_update_higher_textequiv_levels("line", pcgts) + return OcrdPageResult(pcgts) + + def _process_line(self, line, line_coords, line_image, page_id): + self.logger.debug("Sending line image for page '%s' line '%s'", page_id, line.id) + result = self.predictor(line_image, line.id, page_id) + self.logger.debug("Received line result for page '%s' line '%s'", page_id, line.id) + self._post_process_line(line, line_image.shape[0], line_coords, result) + + def _post_process_line(self, line, line_height, line_coords, result): + _, prediction = result + + # Build line text on our own + # + # Calamari does whitespace post-processing on prediction.sentence, + # while it does not do the same on prediction.positions. Do it on + # our own to have consistency. + # + # XXX Check Calamari's built-in post-processing on + # prediction.sentence + + def _sort_chars(p): + """Filter and sort chars of prediction p""" + chars = p.chars + chars = [ + c for c in chars if c.char + ] # XXX Note that omission probabilities are not normalized?! + chars = [ + c + for c in chars + if c.probability >= self.parameter["glyph_conf_cutoff"] + ] + chars = sorted(chars, key=lambda k: k.probability, reverse=True) + return chars + + def _drop_leading_spaces(positions): + return list( + itertools.dropwhile( + lambda p: _sort_chars(p)[0].char == " ", positions + ) + ) - def _drop_trailing_spaces(positions): - return list(reversed(_drop_leading_spaces(reversed(positions)))) - - def _drop_double_spaces(positions): - def _drop_double_spaces_generator(positions): + def _drop_trailing_spaces(positions): + return list(reversed(_drop_leading_spaces(reversed(positions)))) + + def _drop_double_spaces(positions): + def _drop_double_spaces_generator(positions): + last_was_space = False + for p in positions: + if p.chars[0].char == " ": + if not last_was_space: + yield p + last_was_space = True + else: + yield p last_was_space = False - for p in positions: - if p.chars[0].char == " ": - if not last_was_space: - yield p - last_was_space = True - else: - yield p - last_was_space = False - - return list(_drop_double_spaces_generator(positions)) - - positions = prediction.positions - positions = _drop_leading_spaces(positions) - positions = _drop_trailing_spaces(positions) - positions = _drop_double_spaces(positions) - positions = list(positions) - - line_text = "".join(_sort_chars(p)[0].char for p in positions) - if line_text != prediction.sentence: - log.warning( - f"Our own line text is not the same as Calamari's:" - f"'{line_text}' != '{prediction.sentence}'" - ) - # Delete existing results - if line.get_TextEquiv(): - log.warning("Line '%s' already contained text results", line.id) - line.set_TextEquiv([]) - if line.get_Word(): - log.warning( - "Line '%s' already contained word segmentation", line.id + return list(_drop_double_spaces_generator(positions)) + + positions = prediction.positions + positions = _drop_leading_spaces(positions) + positions = _drop_trailing_spaces(positions) + positions = _drop_double_spaces(positions) + positions = list(positions) + + line_text = "".join(_sort_chars(p)[0].char for p in positions) + if line_text != prediction.sentence: + self.logger.warning( + f"Our own line text is not the same as Calamari's:" + f"'{line_text}' != '{prediction.sentence}'" + ) + + # Delete existing results + if line.get_TextEquiv(): + self.logger.warning("Line '%s' already contained text results", line.id) + line.set_TextEquiv([]) + if line.get_Word(): + self.logger.warning( + "Line '%s' already contained word segmentation", line.id + ) + line.set_Word([]) + + # Save line results + line_conf = prediction.avg_char_probability + line.set_TextEquiv( + [TextEquivType(Unicode=line_text, conf=line_conf)] + ) + + # Save word results + # + # Calamari OCR does not provide word positions, so we infer word + # positions from a. text segmentation and b. the glyph positions. + # This is necessary because the PAGE XML format enforces a strict + # hierarchy of lines > words > glyphs. + # + # FIXME: use calamari#282 for this + + def _words(s): + """Split words based on spaces and include spaces as 'words'""" + spaces = None + word = "" + for c in s: + if c == " " and spaces is True: + word += c + elif c != " " and spaces is False: + word += c + else: + if word: + yield word + word = c + spaces = c == " " + yield word + + if self.parameter["textequiv_level"] in ["word", "glyph"]: + word_no = 0 + i = 0 + + for word_text in _words(line_text): + word_length = len(word_text) + if not all(c == " " for c in word_text): + word_positions = positions[i : i + word_length] + word_start = word_positions[0].global_start + word_end = word_positions[-1].global_end + + polygon = polygon_from_x0y0x1y1( + [word_start, 0, word_end, line_height] ) - line.set_Word([]) + points = points_from_polygon( + coordinates_for_segment(polygon, None, line_coords) + ) + # XXX Crop to line polygon? - # Save line results - line_conf = prediction.avg_char_probability - line.set_TextEquiv( - [TextEquivType(Unicode=line_text, conf=line_conf)] - ) + word = WordType( + id="%s_word%04d" % (line.id, word_no), + Coords=CoordsType(points), + ) + word.add_TextEquiv(TextEquivType(Unicode=word_text)) - # Save word results - # - # Calamari OCR does not provide word positions, so we infer word - # positions from a. text segmentation and b. the glyph positions. - # This is necessary because the PAGE XML format enforces a strict - # hierarchy of lines > words > glyphs. - - def _words(s): - """Split words based on spaces and include spaces as 'words'""" - spaces = None - word = "" - for c in s: - if c == " " and spaces is True: - word += c - elif c != " " and spaces is False: - word += c - else: - if word: - yield word - word = c - spaces = c == " " - yield word - - if self.parameter["textequiv_level"] in ["word", "glyph"]: - word_no = 0 - i = 0 - - for word_text in _words(line_text): - word_length = len(word_text) - if not all(c == " " for c in word_text): - word_positions = positions[i : i + word_length] - word_start = word_positions[0].global_start - word_end = word_positions[-1].global_end + if self.parameter["textequiv_level"] == "glyph": + for glyph_no, p in enumerate(word_positions): + glyph_start = p.global_start + glyph_end = p.global_end polygon = polygon_from_x0y0x1y1( - [word_start, 0, word_end, line_image.height] + [ + glyph_start, + 0, + glyph_end, + line_height, + ] ) points = points_from_polygon( - coordinates_for_segment(polygon, None, line_coords) + coordinates_for_segment( + polygon, None, line_coords + ) ) - # XXX Crop to line polygon? - word = WordType( - id="%s_word%04d" % (line.id, word_no), + glyph = GlyphType( + id="%s_glyph%04d" % (word.id, glyph_no), Coords=CoordsType(points), ) - word.add_TextEquiv(TextEquivType(Unicode=word_text)) - - if self.parameter["textequiv_level"] == "glyph": - for glyph_no, p in enumerate(word_positions): - glyph_start = p.global_start - glyph_end = p.global_end - - polygon = polygon_from_x0y0x1y1( - [ - glyph_start, - 0, - glyph_end, - line_image.height, - ] - ) - points = points_from_polygon( - coordinates_for_segment( - polygon, None, line_coords - ) - ) - glyph = GlyphType( - id="%s_glyph%04d" % (word.id, glyph_no), - Coords=CoordsType(points), + # Add predictions (= TextEquivs) + char_index_start = 1 + # Index must start with 1, see + # https://ocr-d.github.io/page#multiple-textequivs + for char_index, char in enumerate( + _sort_chars(p), start=char_index_start + ): + glyph.add_TextEquiv( + TextEquivType( + Unicode=char.char, + index=char_index, + conf=char.probability, ) + ) - # Add predictions (= TextEquivs) - char_index_start = 1 - # Index must start with 1, see - # https://ocr-d.github.io/page#multiple-textequivs - for char_index, char in enumerate( - _sort_chars(p), start=char_index_start - ): - glyph.add_TextEquiv( - TextEquivType( - Unicode=char.char, - index=char_index, - conf=char.probability, - ) - ) - - word.add_Glyph(glyph) - - line.add_Word(word) - word_no += 1 - - i += word_length - - _page_update_higher_textequiv_levels("line", pcgts) - - # Add metadata about this operation and its runtime parameters: - self.add_metadata(pcgts) - file_id = make_file_id(input_file, self.output_file_grp) - pcgts.set_pcGtsId(file_id) - self.workspace.add_file( - file_id=file_id, - file_grp=self.output_file_grp, - page_id=input_file.pageId, - mimetype=MIMETYPE_PAGE, - local_filename=os.path.join(self.output_file_grp, file_id + ".xml"), - content=to_xml(pcgts), + word.add_Glyph(glyph) + + line.add_Word(word) + word_no += 1 + + i += word_length + +class CalamariPredictor: + class PredictWorker(mp.Process): + def __init__(self, logger, device, voter, checkpoint_dir, taskq, resultq, terminate, fill): + self.logger = logger # FIXME: synchronize loggers, too + #self.logger.setLevel(logging.DEBUG) + self.device = device + self.voter = voter + self.checkpoint_dir = checkpoint_dir + self.taskq = taskq + self.resultq = resultq + self.terminate = terminate + self.fill = fill + super().__init__() + def put(self, result): + while not self.terminate.is_set(): + try: + self.resultq.put(result, timeout=0.3) + return + except queue.Full: + continue + page_id = result[0] + if page_id != "none": + self.logger.warning("dropping result for page '%s'", page_id) + def run(self): + initLogging() + tf_disable_interactive_logs() + try: + predictor = self.setup_predictor() + generator = self.setup_pipelines(predictor) + generator = iter(generator()) + self.put(("input_channels", predictor.data.params.input_channels)) + except Exception as e: + self.logger.exception("setup failed") + self.put(("input_channels", e)) + # unrecoverable + self.terminate.set() + while not self.terminate.is_set(): + try: + prediction = next(generator) + page_id, line_id = prediction.meta["id"] + result = prediction.outputs + self.put((page_id, line_id, result)) + self.logger.debug("sent result for page '%s' line '%s'", page_id, line_id) + except StopIteration: + self.logger.info("prediction exhausted generator") + # unrecoverable + self.terminate.set() + except KeyboardInterrupt: + self.terminate.set() + except Exception as e: + # full traceback gets shown when base Processor handles exception + self.logger.error("prediction failed: %s", e.__class__.__name__) + self.put(("", "", e)) # for which page/line?? + # Not only would we have to re-initialize Tensorflow here, + # we cannot even discern which tasks/pages the error occurred on, + # so there will be some worker waiting for results inevitably... + self.terminate.set() + self.logger.debug("terminating predictor: closing result queue") + self.resultq.close() + self.resultq.cancel_join_thread() + def setup_predictor(self): + """ + Set up the model prior to processing. + """ + from calamari_ocr.ocr.predict.predictor import MultiPredictor, PredictorParams + from calamari_ocr.ocr.voting import VoterParams, VoterType + from tfaip.data.databaseparams import DataPipelineParams + from tfaip import DeviceConfigParams + from tfaip.device.device_config import DistributionStrategy + import tensorflow as tf + # unfortunately, tfaip device selector is mandatory and does not provide auto-detection + if self.device < 0: + gpus = [] + self.logger.debug("running on CPU") + elif self.device < len(tf.config.list_physical_devices("GPU")): + gpus = [self.device] + self.logger.info("running on selected GPU device cuda:%d", self.device) + else: + gpus = [] + self.logger.warning("running on CPU because selected GPU device cuda:%d is not available", self.device) + # load model + pred_params = PredictorParams( + silent=True, + progress_bar=False, + device=DeviceConfigParams( + gpus=gpus, + soft_device_placement=False, + #gpu_memory=7000, # limit to 7GB (logical, no dynamic growth) + #dist_strategy=DistributionStrategy.CENTRAL_STORAGE, + ), + pipeline=DataPipelineParams( + batch_size=BATCH_SIZE, + # Number of processes for data loading. + num_processes=4, + use_shared_memory=True, + # group lines with similar lengths to reduce need for padding + # and optimally utilise batch size; + # unfortunately, we cannot use this in an infinite generator + # setting, because TF's bucket_by_sequence_length sometimes + # wants to read ahead for optimal group allocation, which can + # result in deadlocks (because the page workers cannot finish + # unless the already sent batches are returned), so bucketing + # must be disabled: + #bucket_boundaries=GROUP_BOUNDS, + #bucket_batch_sizes=BATCH_GROUPS, + ) ) + voter_params = VoterParams() + voter_params.type = VoterType(self.voter) + # + checkpoints = glob("%s/*.ckpt.json" % self.checkpoint_dir) + self.logger.info("loading %d checkpoints", len(checkpoints)) + predictor = MultiPredictor.from_paths( + checkpoints, + voter_params=voter_params, + predictor_params=pred_params, + ) + #predictor.data.params.pre_proc.run_parallel = False + #predictor.data.params.post_proc.run_parallel = False + def element_length_fn(x): + return x["img_len"] + predictor.data.element_length_fn=lambda: element_length_fn + # rewrap voter JoinedModel and compile (to avoid repeating for each page): + class WrappedModel(tf.keras.models.Model): + def call(self, inputs, training=None, mask=None): + inputs, meta = inputs + return inputs, predictor._keras_model(inputs), meta + predictor.model = WrappedModel() + # for preproc in predictor.data.params.pre_proc.processors: + # self.logger.info("preprocessor: %s", str(preproc)) + predictor.voter = predictor.create_voter(predictor.data.params) + return predictor + def setup_pipelines(self, predictor): + # set up pipeline and generators (as infinite dataset) + from dataclasses import field, dataclass + from paiargparse import pai_dataclass + from tfaip import Sample + from tfaip.data.databaseparams import DataGeneratorParams + from tfaip.data.pipeline.datapipeline import DataPipeline + from tfaip.data.pipeline.datagenerator import DataGenerator + from tfaip.data.pipeline.runningdatapipeline import _wrap_dataset + @pai_dataclass + @dataclass + class QueueDataGeneratorParams(DataGeneratorParams): + terminate : mp.Event = field(default=None) + fill : mp.Lock = field(default=None) + taskq : mp.Queue = field(default=None) + @staticmethod + def cls(): + return QueueDataGenerator + class QueueDataGenerator(DataGenerator[QueueDataGeneratorParams]): + def __len__(self): + raise NotImplementedError() + def generate(self): + while not self.params.terminate.is_set(): + try: + page_id, line_id, image = self.params.taskq.get(timeout=1.1) + except queue.Empty: + # anyone currently awaiting results? + if self.params.fill.acquire(block=False): + self.params.fill.release() # not needed + else: + # stuff with empty images to prevent pipeline / batching stall + # width=2: will be padded to batch anyway + yield Sample(inputs=np.ones((48, 2, predictor.data.params.input_channels), dtype=np.uint8), meta={"id": ("none", "none")}) + continue + #print(f"feeding another input page {page_id} line {line_id}") + yield Sample(inputs=image, meta={"id": (page_id, line_id)}) + class QueueDataPipeline(DataPipeline): + def create_data_generator(self): + return QueueDataGenerator(mode=self.mode, params=self.generator_params) + def input_dataset(self, auto_repeat=None): + gen = self.generate_input_samples(auto_repeat=auto_repeat) + #return gen.as_dataset(self._create_tf_dataset_generator()) + gen.running_pipeline = gen.processor_pipeline_params.create(gen.pipeline_params, gen.data_params) + def generator(): + running_samples_generator = gen._generate_input_samples() + for sample in running_samples_generator: + #print(f"feeding another input {sample.meta} len={sample.inputs['img'].shape[0]}") + yield sample + running_samples_generator.close() + dataset = self._create_tf_dataset_generator().create(generator, False) + def print_fn(*x): + import tensorflow as tf + tf.print(tf.shape(x[0]["img"])) + return x + #dataset = dataset.map(print_fn) + dataset = _wrap_dataset( + self.mode, dataset, self.pipeline_params, self.data, False + ) + #dataset = dataset.map(print_fn) + return dataset + self.logger.debug("setting up input pipeline") + input_pipeline = QueueDataPipeline( + predictor.params.pipeline, predictor._data, + QueueDataGeneratorParams(terminate=self.terminate, fill=self.fill, taskq=self.taskq)) + from tfaip.predict.predictorbase import data_adapter + from tfaip.util.tftyping import sync_to_numpy_or_python_type + from tfaip.data.pipeline.processor.params import SequentialProcessorPipelineParams + from tfaip.predict.multimodelpostprocessor import MultiModelPostProcessorParams + self.logger.debug("instantiating input dataset") + tf_dataset = input_pipeline.input_dataset() + import tensorflow as tf + tf_dataset = tf_dataset.apply( + tf.data.experimental.ignore_errors(log_warning=True) + ) + self.logger.debug("setting up output pipeline") + def predict_dataset(dataset): + for batch in dataset: + #ids = sync_to_numpy_or_python_type(batch[1]['meta']) + #ids = [json.loads(l[0])['id'][1] for l in ids] + #print(f"batch size: {batch[0]['img'].shape} {ids.count('none')/len(ids)*100}%") + r = predictor.model.predict_on_batch(batch) + inputs, outputs, meta = sync_to_numpy_or_python_type(r) + for sample in predictor._unwrap_batch(inputs, {}, outputs, meta): + #print(f"feeding another output {sample.meta}") + yield sample + post_processors = [ + d.get_or_create_pipeline(predictor.params.pipeline, input_pipeline.generator_params).create_output_pipeline() + for d in predictor.datas + ] + post_proc_pipeline = SequentialProcessorPipelineParams( + processors=[MultiModelPostProcessorParams(voter=predictor.voter, post_processors=post_processors)], + run_parallel=predictor.data.params.post_proc.run_parallel, + num_threads=predictor.data.params.post_proc.num_threads, + max_tasks_per_process=predictor.data.params.post_proc.max_tasks_per_process, + ).create(input_pipeline.pipeline_params, predictor.data.params) + def output_generator(): + for sample in post_proc_pipeline.apply(predict_dataset(tf_dataset)): + yield predictor.voter.finalize_sample(sample) + return output_generator + + def __init__(self, device, voter, checkpoint_dir): + self.logger = logging.getLogger("ocrd.processor.CalamariPredictor") + #self.logger.setLevel(logging.DEBUG) + ctxt = mp.get_context('spawn') # not necessary to fork, and spawn is safer + self.taskq = ctxt.Queue(maxsize=3 + config.OCRD_MAX_PARALLEL_PAGES * 200) # 3 + npages * nlines + self.resultq = ctxt.Queue(maxsize=3 + config.OCRD_MAX_PARALLEL_PAGES * 200) + self.terminate = ctxt.Event() # will be shared across all page workers forked from this process + self.fill = ctxt.Lock() # to switch on/off filling up batches in the continuous generator + # spawn single Calamari subprocess prior to base Processor forking any page worker subprocesses + CalamariPredictor.PredictWorker(self.logger, device, voter, checkpoint_dir, + self.taskq, self.resultq, self.terminate, self.fill).start() + id_, self.network_input_channels = self.resultq.get() # block until initialized + assert id_ == "input_channels" # sole possible task during setup/init + if isinstance(self.network_input_channels, Exception): + raise self.network_input_channels + self.logger.info("Loaded model") + # ensure multiple CalamariPredictor instances sync communicating with the same PredictWorker: + mgr = mp.get_context("fork").Manager() # base.Processor will fork workers + self.results = mgr.dict() # {} + weakref.finalize(self, self.shutdown) + + def __del__(self): + self.shutdown() # sets self.terminate (on exception or gc) + + def __call__(self, image, line_id, page_id): + self.taskq.put((page_id, line_id, image)) + self.logger.debug("sent image for page '%s' line '%s'", page_id, line_id) + result = self.get(page_id, line_id) + self.logger.debug("received result for page '%s' line '%s'", page_id, line_id) + return result + + def get(self, page_id, line_id): + self.logger.debug("requested result for page '%s' line '%s'", page_id, line_id) + err = None + while not self.terminate.is_set(): + if (page_id, line_id) in self.results: + result = self.results.pop((page_id, line_id)) + # if isinstance(result, Exception): + # raise Exception(f"prediction failed for page {page_id}") from result + return result + #self.logger.debug("awaiting result for page '%s' line '%s'", page_id, line_id) + try: + page_id1, line_id1, result = self.resultq.get(timeout=0.7) + except queue.Empty: + continue + # FIXME what if page_id == line_id == "" and result is an exception?? + self.logger.debug("storing results for page '%s' line '%s'", page_id1, line_id1) + self.results[(page_id1, line_id1)] = result + if page_id1 == '' and line_id1 == '': + err = result + for page_id, line_id in self.results.keys(): + if page_id != 'none': + self.logger.warning("dropping results for page '%s'", page_id) + if page_id == '' and line_id == '': + err = self.results[(page_id, line_id)] + raise Exception("predictor terminated prematurely") from err + + def shutdown(self): + self.terminate.set() + # while not self.taskq.empty(): + # page_id, _, _ = self.taskq.get() + # self.logger.warning("dropped task for page %s", page_id) + self.taskq.close() + self.taskq.cancel_join_thread() # TODO: This is a copy of ocrd_tesserocr's function, and should probably be moved to a @@ -432,3 +738,41 @@ def _page_update_higher_textequiv_levels(level, pcgts): for line in lines ) region.set_TextEquiv([TextEquivType(Unicode=region_unicode)]) # remove old + +# from calamari_ocr.utils.image.ImageLoader (but for PIL.Image objects) +# (Calamari2 does not tolerate wrong input shape anymore - +# common preprocessors do not change last dimension) +def load_image(img: np.ndarray, channels: int, to_gray_method : str = "cv") -> np.ndarray: + if len(img.shape) == 2: + img_channels = 1 + elif len(img.shape) == 3: + img_channels = img.shape[-1] + else: + raise ValueError(f"Unknown image format. Must bei either WxH or WxHxC, but got {img.shape}.") + + if img_channels == channels: + pass # good + elif img_channels == 2 and channels == 1: + img = img[:, :, 0] + elif img_channels == 3 and channels == 1: + if to_gray_method == "avg": + img = np.mean(img.astype("float32"), axis=-1).astype(dtype=img.dtype) + elif to_gray_method == "cv": + img = cv.cvtColor(img, cv.COLOR_RGB2GRAY) + else: + raise ValueError(f"Unsupported image conversion method {to_gray_method}") + elif img_channels == 4 and channels == 1: + if to_gray_method == "avg": + img = np.mean(img[:, :, :3].astype("float32"), axis=-1).astype(dtype=img.dtype) + elif to_gray_method == "cv": + img = cv.cvtColor(img, cv.COLOR_RGBA2GRAY) + else: + raise ValueError(f"Unsupported image conversion method {to_gray_method}") + elif img_channels == 1 and channels == 3: + img = np.stack([img] * 3, axis=-1) + else: + raise ValueError( + f"Unsupported image format. Trying to convert from {img_channels} channels to " + f"{channels} channels." + ) + return img diff --git a/ocrd_calamari/util.py b/ocrd_calamari/util.py deleted file mode 100644 index 33d5297..0000000 --- a/ocrd_calamari/util.py +++ /dev/null @@ -1,15 +0,0 @@ -import os - - -class working_directory: - """Context manager to temporarily change the working directory""" - - def __init__(self, wd): - self.wd = wd - - def __enter__(self): - self.old_wd = os.getcwd() - os.chdir(self.wd) - - def __exit__(self, etype, value, traceback): - os.chdir(self.old_wd) diff --git a/pyproject.toml b/pyproject.toml index 8646876..133190d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Environment :: Console", "Intended Audience :: Science/Research", "Intended Audience :: Other Audience", - "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Topic :: Text Processing", @@ -76,6 +76,10 @@ branch = true source = [ "ocrd_calamari" ] +concurrency = [ + "thread", + "multiprocessing" +] [tool.coverage.report] exclude_also = [ diff --git a/requirements.txt b/requirements.txt index 3a96004..f2d38b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -tensorflow >= 2.5.0, < 2.16 numpy -calamari-ocr == 1.0.*, >= 1.0.7 -setuptools >= 41.0.0 # tensorboard depends on this, but why do we get an error at runtime? +calamari-ocr >= 2.3.1 +tensorflow < 2.16, != 2.12.0 # avoid Keras 3, avoid broken release click -ocrd >= 2.54.0 +ocrd >= 3.0.0b7 diff --git a/test/base.py b/test/base.py deleted file mode 100644 index d2dc025..0000000 --- a/test/base.py +++ /dev/null @@ -1,7 +0,0 @@ -from test.assets import assets - -from ocrd_utils import initLogging - -initLogging() - -__all__ = ["assets"] diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..68175a7 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,69 @@ +from multiprocessing import Process +from time import sleep +import gc +import pytest + +from ocrd import Resolver, Workspace, OcrdMetsServer +from ocrd_utils import pushd_popd, initLogging, disableLogging, setOverrideLogLevel, getLogger, config + +from .assets import assets + +CONFIGS = ['', 'metsserver+metscache', 'pageparallel', 'pageparallel+metscache'] + +@pytest.fixture(params=CONFIGS) +def workspace(tmpdir, pytestconfig, request): + def _make_workspace(workspace_path): + initLogging() + if pytestconfig.getoption('verbose') > 0: + setOverrideLogLevel('DEBUG') + with pushd_popd(tmpdir): + directory = str(tmpdir) + resolver = Resolver() + workspace = resolver.workspace_from_url(workspace_path, dst_dir=directory, download=True) + config.OCRD_MISSING_OUTPUT = "ABORT" + if 'metscache' in request.param: + config.OCRD_METS_CACHING = True + print("enabled METS caching") + if 'pageparallel' in request.param: + config.OCRD_MAX_PARALLEL_PAGES = 4 + print("enabled page-parallel processing") + if 'pageparallel' in request.param or 'metsserver' in request.param: + def _start_mets_server(*args, **kwargs): + print("running with METS server") + server = OcrdMetsServer(*args, **kwargs) + server.startup() + process = Process(target=_start_mets_server, + kwargs={'workspace': workspace, 'url': 'mets.sock'}) + process.start() + sleep(1) + workspace = Workspace(resolver, directory, mets_server_url='mets.sock') + yield workspace + process.terminate() + process.join() + else: + yield workspace + disableLogging() + config.reset_defaults() + gc.collect() + return _make_workspace + + +@pytest.fixture +def workspace_manifesto(workspace): + yield from workspace(assets.path_to('communist_manifesto/data/mets.xml')) + +@pytest.fixture +def workspace_aufklaerung(workspace): + yield from workspace(assets.path_to('kant_aufklaerung_1784/data/mets.xml')) + +@pytest.fixture +def workspace_aufklaerung_binarized(workspace): + yield from workspace(assets.path_to('kant_aufklaerung_1784-binarized/data/mets.xml')) + +@pytest.fixture +def workspace_aufklaerung_glyph(workspace): + yield from workspace(assets.path_to('kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml')) + +@pytest.fixture +def workspace_sbb(workspace): + yield from workspace(assets.url_of('SBB0000F29300010000/data/mets_one_file.xml')) diff --git a/test/test_recognize.py b/test/test_recognize.py index f4e3587..56d75c4 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -1,198 +1,140 @@ import logging import os import shutil -import subprocess -import tempfile -import pytest from lxml import etree -from ocrd.resolver import Resolver +from ocrd import run_processor +from ocrd_utils import MIMETYPE_PAGE as PAGE +from ocrd_models.constants import NAMESPACES as NS +from ocrd_modelfactory import page_from_file from ocrd_calamari import CalamariRecognize -from .base import assets - -METS_KANT = assets.url_of( - "kant_aufklaerung_1784-page-region-line-word_glyph/data/mets.xml" -) -WORKSPACE_DIR = tempfile.mkdtemp(prefix="test-ocrd-calamari-") -CHECKPOINT_DIR = os.getenv("MODEL", "qurator-gt4histocr-1.0") +CHECKPOINT_DIR = os.getenv("MODEL", "fraktur_19th_century") DEBUG = os.getenv("DEBUG", False) -def page_namespace(tree): - """Return the PAGE content namespace used in the given ElementTree. - - This relies on the assumption that, in any given PAGE content file, the root element - has the local name "PcGts". We do not check if the files uses any valid PAGE - namespace. - """ - root_name = etree.QName(tree.getroot().tag) - if root_name.localname == "PcGts": - return root_name.namespace - else: - raise ValueError("Not a PAGE tree") - - -def assertFileContains(fn, text): +def assertFileContains(fn, text, msg=""): """Assert that the given file contains a given string.""" with open(fn, "r", encoding="utf-8") as f: - assert text in f.read() + assert text in f.read(), msg -def assertFileDoesNotContain(fn, text): +def assertFileDoesNotContain(fn, text, msg=""): """Assert that the given file does not contain given string.""" with open(fn, "r", encoding="utf-8") as f: - assert text not in f.read() - - -@pytest.fixture -def workspace(): - if os.path.exists(WORKSPACE_DIR): - shutil.rmtree(WORKSPACE_DIR) - os.makedirs(WORKSPACE_DIR) - - resolver = Resolver() - # due to core#809 this does not always work: - # workspace = resolver.workspace_from_url(METS_KANT, dst_dir=WORKSPACE_DIR) - # workaround: - shutil.rmtree(WORKSPACE_DIR) - shutil.copytree(os.path.dirname(METS_KANT), WORKSPACE_DIR) - workspace = resolver.workspace_from_url(os.path.join(WORKSPACE_DIR, "mets.xml")) + assert text not in f.read(), msg - # The binarization options I have are: - # - # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my - # machine (protobuf) - # b. ocrd_olena which 1. I cannot fully install via pip and 2. whose dependency - # olena doesn't compile on my machine - # c. just fumble with the original files - # - # So I'm going for option c. - for imgf in workspace.mets.find_files(fileGrp="OCR-D-IMG"): - imgf = workspace.download_file(imgf) - path = os.path.join(workspace.directory, imgf.local_filename) - subprocess.call(["mogrify", "-threshold", "50%", path]) - # Remove GT Words and TextEquivs, to not accidently check GT text instead of the - # OCR text - # XXX Review data again - for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-WORD-GLYPH"): - workspace.download_file(of) - path = os.path.join(workspace.directory, of.local_filename) - tree = etree.parse(path) - nsmap_gt = {"pc": page_namespace(tree)} - for to_remove in ["//pc:Word", "//pc:TextEquiv"]: - for e in tree.xpath(to_remove, namespaces=nsmap_gt): - e.getparent().remove(e) - tree.write(path, xml_declaration=True, encoding="utf-8") - assertFileDoesNotContain(path, "TextEquiv") - - yield workspace - - if not DEBUG: - shutil.rmtree(WORKSPACE_DIR) - - -def test_recognize(workspace): - CalamariRecognize( - workspace, - input_file_grp="OCR-D-GT-SEG-WORD-GLYPH", +def test_recognize(workspace_aufklaerung_binarized, caplog): + caplog.set_level(logging.WARNING) + ws = workspace_aufklaerung_binarized + page1 = ws.mets.physical_pages[0] + file1 = list(ws.find_files(file_grp="OCR-D-GT-WORD", page_id=page1, mimetype=PAGE))[0] + text1 = page_from_file(file1).etree.xpath( + '//page:TextLine/page:TextEquiv[1]/page:Unicode/text()', namespaces=NS) + assert len(text1) > 10 + assert "verſchuldeten" in "\n".join(text1) + run_processor( + CalamariRecognize, + input_file_grp="OCR-D-GT-WORD", output_file_grp="OCR-D-OCR-CALAMARI", parameter={ "checkpoint_dir": CHECKPOINT_DIR, }, - ).process() - workspace.save_mets() - - page1 = os.path.join( - workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_phys_0001.xml" + workspace=ws, ) - assert os.path.exists(page1) - assertFileContains(page1, "verſchuldeten") - - -def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model( - workspace, caplog -): + overwrite_text_log_messages = [t[2] for t in caplog.record_tuples + if "already contained text results" in t[2]] + assert len(overwrite_text_log_messages) > 10 # For every line! + overwrite_word_log_messages = [t[2] for t in caplog.record_tuples + if "already contained word segmentation" in t[2]] + assert len(overwrite_word_log_messages) > 10 # For every line! + ws.save_mets() + file1 = next(ws.find_files(file_grp="OCR-D-OCR-CALAMARI", page_id=page1, mimetype=PAGE), False) + assert file1, "result for first page not referenced in METS" + assert os.path.exists(file1.local_filename), "result for first page not found in filesystem" + text1_out = page_from_file(file1).etree.xpath( + '//page:TextLine/page:TextEquiv[1]/page:Unicode/text()', namespaces=NS) + assert len(text1_out) == len(text1), "not all lines have been recognized" + assert "verſchuldeten" in "\n".join(text1_out), "result for first page is inaccurate" + assert "\n".join(text1_out) != "\n".join(text1), "result is suspiciously identical to GT" + + +def test_recognize_rgb(workspace_aufklaerung, caplog): caplog.set_level(logging.WARNING) - CalamariRecognize( - workspace, - input_file_grp="OCR-D-GT-SEG-WORD-GLYPH", - output_file_grp="OCR-D-OCR-CALAMARI-BROKEN", + run_processor( + CalamariRecognize, + input_file_grp="OCR-D-GT-PAGE", + output_file_grp="OCR-D-OCR-CALAMARI", parameter={"checkpoint_dir": CHECKPOINT_DIR}, - ).process() - - interesting_log_messages = [ - t[2] for t in caplog.record_tuples if "Using raw image" in t[2] - ] + workspace=workspace_aufklaerung, + ) + interesting_log_messages = [t[2] for t in caplog.record_tuples + if "Using raw image" in t[2]] assert len(interesting_log_messages) > 10 # For every line! -def test_word_segmentation(workspace): - CalamariRecognize( - workspace, - input_file_grp="OCR-D-GT-SEG-WORD-GLYPH", +def test_words(workspace_aufklaerung_binarized): + run_processor( + CalamariRecognize, + input_file_grp="OCR-D-GT-WORD", output_file_grp="OCR-D-OCR-CALAMARI", parameter={ "checkpoint_dir": CHECKPOINT_DIR, - "textequiv_level": "word", # Note that we're going down to word level here + "textequiv_level": "word", }, - ).process() - workspace.save_mets() - - page1 = os.path.join( - workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_phys_0001.xml" + workspace=workspace_aufklaerung_binarized ) - assert os.path.exists(page1) - tree = etree.parse(page1) - nsmap = {"pc": page_namespace(tree)} - - # The result should contain a TextLine that contains the text "December" - line = tree.xpath( - ".//pc:TextLine[pc:TextEquiv/pc:Unicode[contains(text(),'December')]]", - namespaces=nsmap, - )[0] - assert line is not None - + ws = workspace_aufklaerung_binarized + ws.save_mets() + page1 = ws.mets.physical_pages[0] + file1 = next(ws.find_files(file_grp="OCR-D-OCR-CALAMARI", page_id=page1, mimetype=PAGE), False) + assert file1, "result for first page not referenced in METS" + assert os.path.exists(file1.local_filename), "result for first page not found in filesystem" + tree1 = page_from_file(file1).etree + # The result should contain a TextLine that contains the text "Berliniſche" + line = tree1.xpath( + "//page:TextLine[page:TextEquiv/page:Unicode[contains(text(),'Berliniſche')]]", + namespaces=NS, + ) + assert len(line) == 1, "result is inaccurate" + line = line[0] # The textline should # a. contain multiple words and # b. these should concatenate fine to produce the same line text - words = line.xpath(".//pc:Word", namespaces=nsmap) - assert len(words) >= 2 + words = line.xpath(".//page:Word", namespaces=NS) + assert len(words) >= 2, "result does not contain words" words_text = " ".join( - word.xpath("pc:TextEquiv/pc:Unicode", namespaces=nsmap)[0].text + word.xpath("page:TextEquiv[1]/page:Unicode/text()", namespaces=NS)[0] for word in words ) - line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=nsmap)[0].text - assert words_text == line_text - + line_text = line.xpath("page:TextEquiv[1]/page:Unicode/text()", namespaces=NS)[0] + assert words_text == line_text, "word-level text result does not concatenate to line-level text result" # For extra measure, check that we're not seeing any glyphs, as we asked for # textequiv_level == "word" - glyphs = tree.xpath("//pc:Glyph", namespaces=nsmap) - assert len(glyphs) == 0 + glyphs = tree1.xpath("//page:Glyph", namespaces=NS) + assert len(glyphs) == 0, "result must not contain glyph-level segments" -def test_glyphs(workspace): - CalamariRecognize( - workspace, - input_file_grp="OCR-D-GT-SEG-WORD-GLYPH", +def test_glyphs(workspace_aufklaerung_binarized): + run_processor( + CalamariRecognize, + input_file_grp="OCR-D-GT-WORD", output_file_grp="OCR-D-OCR-CALAMARI", parameter={ "checkpoint_dir": CHECKPOINT_DIR, - # Note that we're going down to glyph level here "textequiv_level": "glyph", }, - ).process() - workspace.save_mets() - - page1 = os.path.join( - workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_phys_0001.xml" + workspace=workspace_aufklaerung_binarized, ) - assert os.path.exists(page1) - tree = etree.parse(page1) - nsmap = {"pc": page_namespace(tree)} - + ws = workspace_aufklaerung_binarized + ws.save_mets() + page1 = ws.mets.physical_pages[0] + file1 = next(ws.find_files(file_grp="OCR-D-OCR-CALAMARI", page_id=page1, mimetype=PAGE), False) + assert file1, "result for first page not referenced in METS" + assert os.path.exists(file1.local_filename), "result for first page not found in filesystem" + tree1 = page_from_file(file1).etree # The result should contain a lot of glyphs - glyphs = tree.xpath("//pc:Glyph", namespaces=nsmap) - assert len(glyphs) >= 100 + glyphs = tree1.xpath("//page:Glyph", namespaces=NS) + assert len(glyphs) >= 100, "result must contain lots of glyphs"