diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 78cca164..368445eb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,24 +1,126 @@ -name: Build JDK for ARM +name: Build OpenJDK 8 for Android -on: [push] +on: + push: + pull_request: + workflow_dispatch: + schedule: + - cron: '0 0 1 */2 *' jobs: - build: - runs-on: ubuntu-18.04 + build_android: + strategy: + matrix: + arch: [ "aarch32", "aarch64", "x86", "x86_64" ] + fail-fast: false + + name: "Build for Android ${{matrix.arch}}" + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: set up JDK 1.7 + uses: actions/setup-java@v4 + with: + java-version: 8 + distribution: zulu + - name: Install build dependencies + run: | + sudo apt update + sudo apt -y install autoconf python3 python-is-python3 unzip zip systemtap-sdt-dev gcc-multilib g++-multilib libxtst-dev libasound2-dev libelf-dev libfontconfig1-dev libx11-dev + - name: Build with CI build script + run: bash "ci_build_arch_${{matrix.arch}}.sh" + - name: Upload JDK build output + uses: actions/upload-artifact@v4 + with: + name: "jdk8-${{matrix.arch}}" + path: jdk8*.tar.xz + + - name: Setup tmate session if failed + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + + - name: Upload JRE build output + uses: actions/upload-artifact@v4 + with: + name: 'jre8-${{matrix.arch}}' + path: jre8*.tar.xz + - name: Upload JRE debuginfo build output + uses: actions/upload-artifact@v4 + with: + name: "jre8-debuginfo-${{matrix.arch}}" + path: dizout + + build_iosport: + name: "Build for iOS aarch64" + runs-on: MacStadium + continue-on-error: true steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: set up JDK 1.8 - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: - java-version: 1.8 + java-version: 8 + distribution: zulu - name: Build with CI build script - run: ./ci_build.sh + run: | + export BUILD_IOS=1 + #brew install awk + #echo "/opt/procursus/bin:/opt/procursus/local/bin:/Users/eilionoir/.local/bin:/opt/homebrew/bin:/Users/eilionoir/Library/Python/3.9/bin:/Library/Frameworks/Python.framework/Versions/3.10/bin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:/opt/procursus/games" > $GITHUB_PATH + export PATH=/opt/procursus/bin:/opt/homebrew/bin:$PATH + bash "ci_build_arch_aarch64.sh" - - name: Upload build output - uses: actions/upload-artifact@v1 + - name: Upload JDK build output + uses: actions/upload-artifact@v4 + with: + name: "jdk8-ios-aarch64" + path: jdk8*.tar.xz + + - name: Upload JRE build output + uses: actions/upload-artifact@v4 + with: + name: 'jre8-ios-aarch64' + path: jre8*.tar.xz + + - name: Upload JRE debuginfo build output + uses: actions/upload-artifact@v4 + with: + name: "jre8-ios-debuginfo-aarch64" + path: dizout + + pojav: + needs: build_android + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Get jre8-aarch32 + uses: actions/download-artifact@v4 + with: + name: jre8-aarch32 + path: pojav + - name: Get jre8-aarch64 + uses: actions/download-artifact@v4 + with: + name: jre8-aarch64 + path: pojav + - name: Get jre8-x86 + uses: actions/download-artifact@v4 + with: + name: jre8-x86 + path: pojav + - name: Get jre8-x86_64 + uses: actions/download-artifact@v4 + with: + name: jre8-x86_64 + path: pojav + - name: Repack JRE + run: bash "repackjre.sh" $GITHUB_WORKSPACE/pojav $GITHUB_WORKSPACE/pojav/jre8-pojav + - name: Upload artifact + uses: actions/upload-artifact@v4 with: - name: jre - path: jre.tar.xz + name: jre8-pojav + path: pojav/jre8-pojav/* diff --git a/.gitignore b/.gitignore index 47f4a48b..461c1c17 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,19 @@ -cups-2.2.4-source.tar.gz -freetype-2.6.2.tar.bz2 -cups-2.2.4 -freetype-2.6.2 -freetype-2.6.2-x86 -jdk-9.0.4 +cups-*-source.tar.gz +freetype-*.tar.gz +freetype-* +cups-* +dummy_libs +freetype-*-x86 jdk1.8.0_* openjdk -android-ndk-r10e -android-ndk-r10e-linux-x86_64.zip +android-ndk-r* jre.tar.xz jreout +.DS_Store +.idea +ios-missing-include/cups +ios-missing-include/X11 +ios-missing-include/Xm +dizout +jreout +jdkout diff --git a/README.md b/README.md new file mode 100644 index 00000000..44784c15 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# mobile-openjdk8-build-multiarch + +Based on http://openjdk.java.net/projects/mobile/android.html + +## Building + +### Setup +#### Android +- Download Android NDK r10e from https://developer.android.com/ndk/downloads/older_releases.html and place it in this directory (Can't automatically download because of EULA) +- **Warning**: Do not attempt to build use newer or older NDK, it will lead to compilation errors. + +#### iOS +- You should get latest Xcode (tested with Xcode 12). + +### Platform and architecture specific environment variables +
+ | Environment variables | +||||||
---|---|---|---|---|---|---|---|
Platform - Architecture | +TARGET | +TARGET_JDK | +|||||
Android - armv8/aarch64 | +aarch64-linux-android | +aarch64 | +|||||
Android - armv7/aarch32 | +arm-linux-androideabi | +arm | +|||||
Android - x86/i686 | +i686-linux-android | +x86 | +|||||
Android - x86_64/amd64 | +x86_64-linux-android | +x86_64 | +|||||
iOS/iPadOS - armv8/aarch64 | +aarch64-macos-ios | +aarch64 | +
UScript
class.
++ *
++ * @stable ICU 2.2
++ */
++
++enum ScriptCodes {
++ zyyyScriptCode = 0,
++ zinhScriptCode = 1,
++ qaaiScriptCode = zinhScriptCode, /* manually added alias, for API stability */
++ arabScriptCode = 2,
++ armnScriptCode = 3,
++ bengScriptCode = 4,
++ bopoScriptCode = 5,
++ cherScriptCode = 6,
++ coptScriptCode = 7,
++ cyrlScriptCode = 8,
++ dsrtScriptCode = 9,
++ devaScriptCode = 10,
++ ethiScriptCode = 11,
++ georScriptCode = 12,
++ gothScriptCode = 13,
++ grekScriptCode = 14,
++ gujrScriptCode = 15,
++ guruScriptCode = 16,
++ haniScriptCode = 17,
++ hangScriptCode = 18,
++ hebrScriptCode = 19,
++ hiraScriptCode = 20,
++ kndaScriptCode = 21,
++ kanaScriptCode = 22,
++ khmrScriptCode = 23,
++ laooScriptCode = 24,
++ latnScriptCode = 25,
++ mlymScriptCode = 26,
++ mongScriptCode = 27,
++ mymrScriptCode = 28,
++ ogamScriptCode = 29,
++ italScriptCode = 30,
++ oryaScriptCode = 31,
++ runrScriptCode = 32,
++ sinhScriptCode = 33,
++ syrcScriptCode = 34,
++ tamlScriptCode = 35,
++ teluScriptCode = 36,
++ thaaScriptCode = 37,
++ thaiScriptCode = 38,
++ tibtScriptCode = 39,
++/**
++ * @stable ICU 2.6
++ */
++
++ cansScriptCode = 40,
++/**
++ * @stable ICU 2.2
++ */
++
++ yiiiScriptCode = 41,
++ tglgScriptCode = 42,
++ hanoScriptCode = 43,
++ buhdScriptCode = 44,
++ tagbScriptCode = 45,
++/**
++ * @stable ICU 2.6
++ */
++
++ braiScriptCode = 46,
++ cprtScriptCode = 47,
++ limbScriptCode = 48,
++ linbScriptCode = 49,
++ osmaScriptCode = 50,
++ shawScriptCode = 51,
++ taleScriptCode = 52,
++ ugarScriptCode = 53,
++/**
++ * @stable ICU 3.0
++ */
++
++ hrktScriptCode = 54,
++/**
++ * @stable ICU 3.4
++ */
++
++ bugiScriptCode = 55,
++ glagScriptCode = 56,
++ kharScriptCode = 57,
++ syloScriptCode = 58,
++ taluScriptCode = 59,
++ tfngScriptCode = 60,
++ xpeoScriptCode = 61,
++/**
++ * @stable ICU 3.6
++ */
++
++ baliScriptCode = 62,
++ batkScriptCode = 63,
++ blisScriptCode = 64,
++ brahScriptCode = 65,
++ chamScriptCode = 66,
++ cirtScriptCode = 67,
++ cyrsScriptCode = 68,
++ egydScriptCode = 69,
++ egyhScriptCode = 70,
++ egypScriptCode = 71,
++ geokScriptCode = 72,
++ hansScriptCode = 73,
++ hantScriptCode = 74,
++ hmngScriptCode = 75,
++ hungScriptCode = 76,
++ indsScriptCode = 77,
++ javaScriptCode = 78,
++ kaliScriptCode = 79,
++ latfScriptCode = 80,
++ latgScriptCode = 81,
++ lepcScriptCode = 82,
++ linaScriptCode = 83,
++ mandScriptCode = 84,
++ mayaScriptCode = 85,
++ meroScriptCode = 86,
++ nkooScriptCode = 87,
++ orkhScriptCode = 88,
++ permScriptCode = 89,
++ phagScriptCode = 90,
++ phnxScriptCode = 91,
++ plrdScriptCode = 92,
++ roroScriptCode = 93,
++ saraScriptCode = 94,
++ syreScriptCode = 95,
++ syrjScriptCode = 96,
++ syrnScriptCode = 97,
++ tengScriptCode = 98,
++ vaiiScriptCode = 99,
++ vispScriptCode = 100,
++ xsuxScriptCode = 101,
++ zxxxScriptCode = 102,
++ zzzzScriptCode = 103,
++/**
++ * @stable ICU 3.8
++ */
++
++ cariScriptCode = 104,
++ jpanScriptCode = 105,
++ lanaScriptCode = 106,
++ lyciScriptCode = 107,
++ lydiScriptCode = 108,
++ olckScriptCode = 109,
++ rjngScriptCode = 110,
++ saurScriptCode = 111,
++ sgnwScriptCode = 112,
++ sundScriptCode = 113,
++ moonScriptCode = 114,
++ mteiScriptCode = 115,
++/**
++ * @stable ICU 4.0
++ */
++
++ armiScriptCode = 116,
++ avstScriptCode = 117,
++ cakmScriptCode = 118,
++ koreScriptCode = 119,
++ kthiScriptCode = 120,
++ maniScriptCode = 121,
++ phliScriptCode = 122,
++ phlpScriptCode = 123,
++ phlvScriptCode = 124,
++ prtiScriptCode = 125,
++ samrScriptCode = 126,
++ tavtScriptCode = 127,
++ zmthScriptCode = 128,
++ zsymScriptCode = 129,
++/**
++ * @stable ICU 4.4
++ */
++
++ bamuScriptCode = 130,
++ lisuScriptCode = 131,
++ nkgbScriptCode = 132,
++ sarbScriptCode = 133,
++/**
++ * @stable ICU 4.6
++ */
++
++ bassScriptCode = 134,
++ duplScriptCode = 135,
++ elbaScriptCode = 136,
++ granScriptCode = 137,
++ kpelScriptCode = 138,
++ lomaScriptCode = 139,
++ mendScriptCode = 140,
++ mercScriptCode = 141,
++ narbScriptCode = 142,
++ nbatScriptCode = 143,
++ palmScriptCode = 144,
++ sindScriptCode = 145,
++ waraScriptCode = 146,
++/**
++ * @stable ICU 4.8
++ */
++
++ afakScriptCode = 147,
++ jurcScriptCode = 148,
++ mrooScriptCode = 149,
++ nshuScriptCode = 150,
++ shrdScriptCode = 151,
++ soraScriptCode = 152,
++ takrScriptCode = 153,
++ tangScriptCode = 154,
++ woleScriptCode = 155,
++/**
++ * @stable ICU 49
++ */
++
++ hluwScriptCode = 156, /* bump to match current ICU */
++ khojScriptCode = 157,
++ tirhScriptCode = 158,
++
++/**
++ * @stable ICU 2.2
++ */
++ scriptCodeCount
++};
++
++U_NAMESPACE_END
++#endif
+diff --git a/jdk/src/share/native/sun/font/layout/LEStandalone.h b/jdk/src/share/native/common/LEStandalone.h
+similarity index 100%
+rename from jdk/src/share/native/sun/font/layout/LEStandalone.h
+rename to jdk/src/share/native/common/LEStandalone.h
+diff --git a/jdk/src/share/native/sun/font/layout/LESwaps.h b/jdk/src/share/native/common/LESwaps.h
+similarity index 100%
+rename from jdk/src/share/native/sun/font/layout/LESwaps.h
+rename to jdk/src/share/native/common/LESwaps.h
+diff --git a/jdk/src/share/native/sun/font/layout/LETableReference.h b/jdk/src/share/native/common/LETableReference.h
+similarity index 100%
+rename from jdk/src/share/native/sun/font/layout/LETableReference.h
+rename to jdk/src/share/native/common/LETableReference.h
+diff --git a/jdk/src/share/native/sun/font/layout/LETypes.h b/jdk/src/share/native/common/LETypes.h
+similarity index 100%
+rename from jdk/src/share/native/sun/font/layout/LETypes.h
+rename to jdk/src/share/native/common/LETypes.h
+diff --git a/jdk/src/share/native/common/awt_ImagingLib.h b/jdk/src/share/native/common/awt_ImagingLib.h
+new file mode 100644
+index 0000000000..f0dfec5b2b
+--- /dev/null
++++ b/jdk/src/share/native/common/awt_ImagingLib.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2000, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation. Oracle designates this
++ * particular file as subject to the "Classpath" exception as provided
++ * by Oracle in the LICENSE file that accompanied this code.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++#ifndef _AWT_IMAGINGLIB_H_
++#define _AWT_IMAGINGLIB_H_
++
++#include "mlib_types.h"
++#include "mlib_status.h"
++#include "mlib_image_types.h"
++#include "mlib_image_get.h"
++
++/* Struct that holds the mlib function ptrs and names */
++typedef struct {
++ mlib_status (*fptr)();
++ char *fname;
++} mlibFnS_t;
++
++typedef mlib_image *(*MlibCreateFP_t)(mlib_type, mlib_s32, mlib_s32,
++ mlib_s32);
++typedef mlib_image *(*MlibCreateStructFP_t)(mlib_type, mlib_s32, mlib_s32,
++ mlib_s32, mlib_s32, void *);
++typedef void (*MlibDeleteFP_t)(mlib_image *);
++
++typedef struct {
++ MlibCreateFP_t createFP;
++ MlibCreateStructFP_t createStructFP;
++ MlibDeleteFP_t deleteImageFP;
++} mlibSysFnS_t;
++
++#endif /* _AWT_IMAGINGLIB_H */
+diff --git a/jdk/src/share/native/common/bionic_mbstate.h b/jdk/src/share/native/common/bionic_mbstate.h
+new file mode 100644
+index 0000000000..fce06f54eb
+--- /dev/null
++++ b/jdk/src/share/native/common/bionic_mbstate.h
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (C) 2014 The Android Open Source Project
++ * All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ * * Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * * Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in
++ * the documentation and/or other materials provided with the
++ * distribution.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
++ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
++ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
++ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
++ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
++ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGE.
++ */
++#ifndef _BIONIC_MBSTATE_H
++#define _BIONIC_MBSTATE_H
++#include ++ * Line boundary analysis determines where a text string can be broken ++ * when line-wrapping. The mechanism correctly handles punctuation and ++ * hyphenated words. ++ *
++ * Sentence boundary analysis allows selection with correct ++ * interpretation of periods within numbers and abbreviations, and ++ * trailing punctuation marks such as quotation marks and parentheses. ++ *
++ * Word boundary analysis is used by search and replace functions, as ++ * well as within text editing applications that allow the user to ++ * select words with a double click. Word selection provides correct ++ * interpretation of punctuation marks within and following ++ * words. Characters that are not part of a word, such as symbols or ++ * punctuation marks, have word-breaks on both sides. ++ *
++ * Character boundary analysis allows users to interact with ++ * characters as they expect to, for example, when moving the cursor ++ * through a text string. Character boundary analysis provides correct ++ * navigation of through character strings, regardless of how the ++ * character is stored. For example, an accented character might be ++ * stored as a base character and a diacritical mark. What users ++ * consider to be a character can differ between languages. ++ *
++ * The text boundary positions are found according to the rules ++ * described in Unicode Standard Annex #29, Text Boundaries, and ++ * Unicode Standard Annex #14, Line Breaking Properties. These ++ * are available at http://www.unicode.org/reports/tr14/ and ++ * http://www.unicode.org/reports/tr29/. ++ *
++ * In addition to the C++ API defined in this header file, a ++ * plain C API with equivalent functionality is defined in the ++ * file ubrk.h ++ *
++ * Code snippets illustrating the use of the Break Iterator APIs ++ * are available in the ICU User Guide, ++ * http://icu-project.org/userguide/boundaryAnalysis.html ++ * and in the sample program icu/source/samples/break/break.cpp ++ * ++ */ ++class U_COMMON_API BreakIterator : public UObject { ++public: ++ /** ++ * destructor ++ * @stable ICU 2.0 ++ */ ++ virtual ~BreakIterator(); ++ ++ /** ++ * Return true if another object is semantically equal to this ++ * one. The other object should be an instance of the same subclass of ++ * BreakIterator. Objects of different subclasses are considered ++ * unequal. ++ *
++ * Return true if this BreakIterator is at the same position in the ++ * same text, and is the same class and type (word, line, etc.) of ++ * BreakIterator, as the argument. Text is considered the same if ++ * it contains the same characters, it need not be the same ++ * object, and styles are not considered. ++ * @stable ICU 2.0 ++ */ ++ virtual UBool operator==(const BreakIterator&) const = 0; ++ ++ /** ++ * Returns the complement of the result of operator== ++ * @param rhs The BreakIterator to be compared for inequality ++ * @return the complement of the result of operator== ++ * @stable ICU 2.0 ++ */ ++ UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } ++ ++ /** ++ * Return a polymorphic copy of this object. This is an abstract ++ * method which subclasses implement. ++ * @stable ICU 2.0 ++ */ ++ virtual BreakIterator* clone(void) const = 0; ++ ++ /** ++ * Return a polymorphic class ID for this object. Different subclasses ++ * will return distinct unequal values. ++ * @stable ICU 2.0 ++ */ ++ virtual UClassID getDynamicClassID(void) const = 0; ++ ++ /** ++ * Return a CharacterIterator over the text being analyzed. ++ * @stable ICU 2.0 ++ */ ++ virtual CharacterIterator& getText(void) const = 0; ++ ++ ++ /** ++ * Get a UText for the text being analyzed. ++ * The returned UText is a shallow clone of the UText used internally ++ * by the break iterator implementation. It can safely be used to ++ * access the text without impacting any break iterator operations, ++ * but the underlying text itself must not be altered. ++ * ++ * @param fillIn A UText to be filled in. If NULL, a new UText will be ++ * allocated to hold the result. ++ * @param status receives any error codes. ++ * @return The current UText for this break iterator. If an input ++ * UText was provided, it will always be returned. ++ * @stable ICU 3.4 ++ */ ++ virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; ++ ++ /** ++ * Change the text over which this operates. The text boundary is ++ * reset to the start. ++ * @param text The UnicodeString used to change the text. ++ * @stable ICU 2.0 ++ */ ++ virtual void setText(const UnicodeString &text) = 0; ++ ++ /** ++ * Reset the break iterator to operate over the text represented by ++ * the UText. The iterator position is reset to the start. ++ * ++ * This function makes a shallow clone of the supplied UText. This means ++ * that the caller is free to immediately close or otherwise reuse the ++ * Utext that was passed as a parameter, but that the underlying text itself ++ * must not be altered while being referenced by the break iterator. ++ * ++ * All index positions returned by break iterator functions are ++ * native indices from the UText. For example, when breaking UTF-8 ++ * encoded text, the break positions returned by next(), previous(), etc. ++ * will be UTF-8 string indices, not UTF-16 positions. ++ * ++ * @param text The UText used to change the text. ++ * @param status receives any error codes. ++ * @stable ICU 3.4 ++ */ ++ virtual void setText(UText *text, UErrorCode &status) = 0; ++ ++ /** ++ * Change the text over which this operates. The text boundary is ++ * reset to the start. ++ * Note that setText(UText *) provides similar functionality to this function, ++ * and is more efficient. ++ * @param it The CharacterIterator used to change the text. ++ * @stable ICU 2.0 ++ */ ++ virtual void adoptText(CharacterIterator* it) = 0; ++ ++ enum { ++ /** ++ * DONE is returned by previous() and next() after all valid ++ * boundaries have been returned. ++ * @stable ICU 2.0 ++ */ ++ DONE = (int32_t)-1 ++ }; ++ ++ /** ++ * Sets the current iteration position to the beginning of the text, position zero. ++ * @return The offset of the beginning of the text, zero. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t first(void) = 0; ++ ++ /** ++ * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. ++ * @return The index immediately BEYOND the last character in the text being scanned. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t last(void) = 0; ++ ++ /** ++ * Set the iterator position to the boundary preceding the current boundary. ++ * @return The character index of the previous text boundary or DONE if all ++ * boundaries have been returned. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t previous(void) = 0; ++ ++ /** ++ * Advance the iterator to the boundary following the current boundary. ++ * @return The character index of the next text boundary or DONE if all ++ * boundaries have been returned. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t next(void) = 0; ++ ++ /** ++ * Return character index of the current interator position within the text. ++ * @return The boundary most recently returned. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t current(void) const = 0; ++ ++ /** ++ * Advance the iterator to the first boundary following the specified offset. ++ * The value returned is always greater than the offset or ++ * the value BreakIterator.DONE ++ * @param offset the offset to begin scanning. ++ * @return The first boundary after the specified offset. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t following(int32_t offset) = 0; ++ ++ /** ++ * Set the iterator position to the first boundary preceding the specified offset. ++ * The value returned is always smaller than the offset or ++ * the value BreakIterator.DONE ++ * @param offset the offset to begin scanning. ++ * @return The first boundary before the specified offset. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t preceding(int32_t offset) = 0; ++ ++ /** ++ * Return true if the specfied position is a boundary position. ++ * As a side effect, the current position of the iterator is set ++ * to the first boundary position at or following the specified offset. ++ * @param offset the offset to check. ++ * @return True if "offset" is a boundary position. ++ * @stable ICU 2.0 ++ */ ++ virtual UBool isBoundary(int32_t offset) = 0; ++ ++ /** ++ * Set the iterator position to the nth boundary from the current boundary ++ * @param n the number of boundaries to move by. A value of 0 ++ * does nothing. Negative values move to previous boundaries ++ * and positive values move to later boundaries. ++ * @return The new iterator position, or ++ * DONE if there are fewer than |n| boundaries in the specfied direction. ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t next(int32_t n) = 0; ++ ++ /** ++ * For RuleBasedBreakIterators, return the status tag from the ++ * break rule that determined the most recently ++ * returned break position. ++ *
++ * For break iterator types that do not support a rule status, ++ * a default value of 0 is returned. ++ *
++ * @return the status from the break rule that determined the most recently ++ * returned break position. ++ * @see RuleBaseBreakIterator::getRuleStatus() ++ * @see UWordBreak ++ * @stable ICU 52 ++ */ ++ virtual int32_t getRuleStatus() const; ++ ++ /** ++ * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) ++ * that determined the most recently returned break position. ++ *
++ * For break iterator types that do not support rule status, ++ * no values are returned. ++ *
++ * The returned status value(s) are stored into an array provided by the caller. ++ * The values are stored in sorted (ascending) order. ++ * If the capacity of the output array is insufficient to hold the data, ++ * the output will be truncated to the available length, and a ++ * U_BUFFER_OVERFLOW_ERROR will be signaled. ++ *
++ * @see RuleBaseBreakIterator::getRuleStatusVec ++ * ++ * @param fillInVec an array to be filled in with the status values. ++ * @param capacity the length of the supplied vector. A length of zero causes ++ * the function to return the number of status values, in the ++ * normal way, without attemtping to store any values. ++ * @param status receives error codes. ++ * @return The number of rule status values from rules that determined ++ * the most recent boundary returned by the break iterator. ++ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value ++ * is the total number of status values that were available, ++ * not the reduced number that were actually returned. ++ * @see getRuleStatus ++ * @stable ICU 52 ++ */ ++ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); ++ ++ /** ++ * Create BreakIterator for word-breaks using the given locale. ++ * Returns an instance of a BreakIterator implementing word breaks. ++ * WordBreak is useful for word selection (ex. double click) ++ * @param where the locale. ++ * @param status the error code ++ * @return A BreakIterator for word-breaks. The UErrorCode& status ++ * parameter is used to return status information to the user. ++ * To check whether the construction succeeded or not, you should check ++ * the value of U_SUCCESS(err). If you wish more detailed information, you ++ * can check for informational error results which still indicate success. ++ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For ++ * example, 'de_CH' was requested, but nothing was found there, so 'de' was ++ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was ++ * used; neither the requested locale nor any of its fall back locales ++ * could be found. ++ * The caller owns the returned object and is responsible for deleting it. ++ * @stable ICU 2.0 ++ */ ++ static BreakIterator* U_EXPORT2 ++ createWordInstance(const Locale& where, UErrorCode& status); ++ ++ /** ++ * Create BreakIterator for line-breaks using specified locale. ++ * Returns an instance of a BreakIterator implementing line breaks. Line ++ * breaks are logically possible line breaks, actual line breaks are ++ * usually determined based on display width. ++ * LineBreak is useful for word wrapping text. ++ * @param where the locale. ++ * @param status The error code. ++ * @return A BreakIterator for line-breaks. The UErrorCode& status ++ * parameter is used to return status information to the user. ++ * To check whether the construction succeeded or not, you should check ++ * the value of U_SUCCESS(err). If you wish more detailed information, you ++ * can check for informational error results which still indicate success. ++ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For ++ * example, 'de_CH' was requested, but nothing was found there, so 'de' was ++ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was ++ * used; neither the requested locale nor any of its fall back locales ++ * could be found. ++ * The caller owns the returned object and is responsible for deleting it. ++ * @stable ICU 2.0 ++ */ ++ static BreakIterator* U_EXPORT2 ++ createLineInstance(const Locale& where, UErrorCode& status); ++ ++ /** ++ * Create BreakIterator for character-breaks using specified locale ++ * Returns an instance of a BreakIterator implementing character breaks. ++ * Character breaks are boundaries of combining character sequences. ++ * @param where the locale. ++ * @param status The error code. ++ * @return A BreakIterator for character-breaks. The UErrorCode& status ++ * parameter is used to return status information to the user. ++ * To check whether the construction succeeded or not, you should check ++ * the value of U_SUCCESS(err). If you wish more detailed information, you ++ * can check for informational error results which still indicate success. ++ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For ++ * example, 'de_CH' was requested, but nothing was found there, so 'de' was ++ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was ++ * used; neither the requested locale nor any of its fall back locales ++ * could be found. ++ * The caller owns the returned object and is responsible for deleting it. ++ * @stable ICU 2.0 ++ */ ++ static BreakIterator* U_EXPORT2 ++ createCharacterInstance(const Locale& where, UErrorCode& status); ++ ++ /** ++ * Create BreakIterator for sentence-breaks using specified locale ++ * Returns an instance of a BreakIterator implementing sentence breaks. ++ * @param where the locale. ++ * @param status The error code. ++ * @return A BreakIterator for sentence-breaks. The UErrorCode& status ++ * parameter is used to return status information to the user. ++ * To check whether the construction succeeded or not, you should check ++ * the value of U_SUCCESS(err). If you wish more detailed information, you ++ * can check for informational error results which still indicate success. ++ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For ++ * example, 'de_CH' was requested, but nothing was found there, so 'de' was ++ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was ++ * used; neither the requested locale nor any of its fall back locales ++ * could be found. ++ * The caller owns the returned object and is responsible for deleting it. ++ * @stable ICU 2.0 ++ */ ++ static BreakIterator* U_EXPORT2 ++ createSentenceInstance(const Locale& where, UErrorCode& status); ++ ++ /** ++ * Create BreakIterator for title-casing breaks using the specified locale ++ * Returns an instance of a BreakIterator implementing title breaks. ++ * The iterator returned locates title boundaries as described for ++ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, ++ * please use Word Boundary iterator.{@link #createWordInstance } ++ * ++ * @param where the locale. ++ * @param status The error code. ++ * @return A BreakIterator for title-breaks. The UErrorCode& status ++ * parameter is used to return status information to the user. ++ * To check whether the construction succeeded or not, you should check ++ * the value of U_SUCCESS(err). If you wish more detailed information, you ++ * can check for informational error results which still indicate success. ++ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For ++ * example, 'de_CH' was requested, but nothing was found there, so 'de' was ++ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was ++ * used; neither the requested locale nor any of its fall back locales ++ * could be found. ++ * The caller owns the returned object and is responsible for deleting it. ++ * @stable ICU 2.1 ++ */ ++ static BreakIterator* U_EXPORT2 ++ createTitleInstance(const Locale& where, UErrorCode& status); ++ ++ /** ++ * Get the set of Locales for which TextBoundaries are installed. ++ *
Note: this will not return locales added through the register ++ * call. To see the registered locales too, use the getAvailableLocales ++ * function that returns a StringEnumeration object
++ * @param count the output parameter of number of elements in the locale list ++ * @return available locales ++ * @stable ICU 2.0 ++ */ ++ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); ++ ++ /** ++ * Get name of the object for the desired Locale, in the desired langauge. ++ * @param objectLocale must be from getAvailableLocales. ++ * @param displayLocale specifies the desired locale for output. ++ * @param name the fill-in parameter of the return value ++ * Uses best match. ++ * @return user-displayable name ++ * @stable ICU 2.0 ++ */ ++ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, ++ const Locale& displayLocale, ++ UnicodeString& name); ++ ++ /** ++ * Get name of the object for the desired Locale, in the langauge of the ++ * default locale. ++ * @param objectLocale must be from getMatchingLocales ++ * @param name the fill-in parameter of the return value ++ * @return user-displayable name ++ * @stable ICU 2.0 ++ */ ++ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, ++ UnicodeString& name); ++ ++ /** ++ * Deprecated functionality. Use clone() instead. ++ * ++ * Thread safe client-buffer-based cloning operation ++ * Do NOT call delete on a safeclone, since 'new' is not used to create it. ++ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. ++ * If buffer is not large enough, new memory will be allocated. ++ * @param BufferSize reference to size of allocated space. ++ * If BufferSize == 0, a sufficient size for use in cloning will ++ * be returned ('pre-flighting') ++ * If BufferSize is not enough for a stack-based safe clone, ++ * new memory will be allocated. ++ * @param status to indicate whether the operation went on smoothly or there were errors ++ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were ++ * necessary. ++ * @return pointer to the new clone ++ * ++ * @deprecated ICU 52. Use clone() instead. ++ */ ++ virtual BreakIterator * createBufferClone(void *stackBuffer, ++ int32_t &BufferSize, ++ UErrorCode &status) = 0; ++ ++#ifndef U_HIDE_DEPRECATED_API ++ ++ /** ++ * Determine whether the BreakIterator was created in user memory by ++ * createBufferClone(), and thus should not be deleted. Such objects ++ * must be closed by an explicit call to the destructor (not delete). ++ * @deprecated ICU 52. Always delete the BreakIterator. ++ */ ++ inline UBool isBufferClone(void); ++ ++#endif /* U_HIDE_DEPRECATED_API */ ++ ++#if !UCONFIG_NO_SERVICE ++ /** ++ * Register a new break iterator of the indicated kind, to use in the given locale. ++ * The break iterator will be adopted. Clones of the iterator will be returned ++ * if a request for a break iterator of the given kind matches or falls back to ++ * this locale. ++ * Because ICU may choose to cache BreakIterators internally, this must ++ * be called at application startup, prior to any calls to ++ * BreakIterator::createXXXInstance to avoid undefined behavior. ++ * @param toAdopt the BreakIterator instance to be adopted ++ * @param locale the Locale for which this instance is to be registered ++ * @param kind the type of iterator for which this instance is to be registered ++ * @param status the in/out status code, no special meanings are assigned ++ * @return a registry key that can be used to unregister this instance ++ * @stable ICU 2.4 ++ */ ++ static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, ++ const Locale& locale, ++ UBreakIteratorType kind, ++ UErrorCode& status); ++ ++ /** ++ * Unregister a previously-registered BreakIterator using the key returned from the ++ * register call. Key becomes invalid after a successful call and should not be used again. ++ * The BreakIterator corresponding to the key will be deleted. ++ * Because ICU may choose to cache BreakIterators internally, this should ++ * be called during application shutdown, after all calls to ++ * BreakIterator::createXXXInstance to avoid undefined behavior. ++ * @param key the registry key returned by a previous call to registerInstance ++ * @param status the in/out status code, no special meanings are assigned ++ * @return TRUE if the iterator for the key was successfully unregistered ++ * @stable ICU 2.4 ++ */ ++ static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); ++ ++ /** ++ * Return a StringEnumeration over the locales available at the time of the call, ++ * including registered locales. ++ * @return a StringEnumeration over the locales available at the time of the call ++ * @stable ICU 2.4 ++ */ ++ static StringEnumeration* U_EXPORT2 getAvailableLocales(void); ++#endif ++ ++ /** ++ * Returns the locale for this break iterator. Two flavors are available: valid and ++ * actual locale. ++ * @stable ICU 2.8 ++ */ ++ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; ++ ++#ifndef U_HIDE_INTERNAL_API ++ /** Get the locale for this break iterator object. You can choose between valid and actual locale. ++ * @param type type of the locale we're looking for (valid or actual) ++ * @param status error code for the operation ++ * @return the locale ++ * @internal ++ */ ++ const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; ++#endif /* U_HIDE_INTERNAL_API */ ++ ++ /** ++ * Set the subject text string upon which the break iterator is operating ++ * without changing any other aspect of the matching state. ++ * The new and previous text strings must have the same content. ++ * ++ * This function is intended for use in environments where ICU is operating on ++ * strings that may move around in memory. It provides a mechanism for notifying ++ * ICU that the string has been relocated, and providing a new UText to access the ++ * string in its new position. ++ * ++ * Note that the break iterator implementation never copies the underlying text ++ * of a string being processed, but always operates directly on the original text ++ * provided by the user. Refreshing simply drops the references to the old text ++ * and replaces them with references to the new. ++ * ++ * Caution: this function is normally used only by very specialized, ++ * system-level code. One example use case is with garbage collection that moves ++ * the text in memory. ++ * ++ * @param input The new (moved) text string. ++ * @param status Receives errors detected by this function. ++ * @return *this ++ * ++ * @stable ICU 49 ++ */ ++ virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; ++ ++ private: ++ static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); ++ static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); ++ static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); ++ ++ friend class ICUBreakIteratorFactory; ++ friend class ICUBreakIteratorService; ++ ++protected: ++ // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API ++ // or else the compiler will create a public ones. ++ /** @internal */ ++ BreakIterator(); ++ /** @internal */ ++ BreakIterator (const BreakIterator &other) : UObject(other) {} ++#ifndef U_HIDE_INTERNAL_API ++ /** @internal */ ++ BreakIterator (const Locale& valid, const Locale& actual); ++#endif /* U_HIDE_INTERNAL_API */ ++ ++private: ++ ++ /** @internal */ ++ char actualLocale[ULOC_FULLNAME_CAPACITY]; ++ char validLocale[ULOC_FULLNAME_CAPACITY]; ++ ++ /** ++ * The assignment operator has no real implementation. ++ * It's provided to make the compiler happy. Do not call. ++ */ ++ BreakIterator& operator=(const BreakIterator&); ++}; ++ ++#ifndef U_HIDE_DEPRECATED_API ++ ++inline UBool BreakIterator::isBufferClone() ++{ ++ return FALSE; ++} ++ ++#endif /* U_HIDE_DEPRECATED_API */ ++ ++U_NAMESPACE_END ++ ++#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ ++ ++#endif // _BRKITER ++//eof +diff --git a/jdk/src/share/native/common/unicode/bytestream.h b/jdk/src/share/native/common/unicode/bytestream.h +new file mode 100644 +index 0000000000..174aa38afc +--- /dev/null ++++ b/jdk/src/share/native/common/unicode/bytestream.h +@@ -0,0 +1,257 @@ ++// Copyright (C) 2009-2012, International Business Machines ++// Corporation and others. All Rights Reserved. ++// ++// Copyright 2007 Google Inc. All Rights Reserved. ++// Author: sanjay@google.com (Sanjay Ghemawat) ++// ++// Abstract interface that consumes a sequence of bytes (ByteSink). ++// ++// Used so that we can write a single piece of code that can operate ++// on a variety of output string types. ++// ++// Various implementations of this interface are provided: ++// ByteSink: ++// CheckedArrayByteSink Write to a flat array, with bounds checking ++// StringByteSink Write to an STL string ++ ++// This code is a contribution of Google code, and the style used here is ++// a compromise between the original Google code and the ICU coding guidelines. ++// For example, data types are ICU-ified (size_t,int->int32_t), ++// and API comments doxygen-ified, but function names and behavior are ++// as in the original, if possible. ++// Assertion-style error handling, not available in ICU, was changed to ++// parameter "pinning" similar to UnicodeString. ++// ++// In addition, this is only a partial port of the original Google code, ++// limited to what was needed so far. The (nearly) complete original code ++// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib ++// (see ICU ticket 6765, r25517). ++ ++#ifndef __BYTESTREAM_H__ ++#define __BYTESTREAM_H__ ++ ++/** ++ * \file ++ * \brief C++ API: Interface for writing bytes, and implementation classes. ++ */ ++ ++#include "unicode/utypes.h" ++#include "unicode/uobject.h" ++#include "unicode/std_string.h" ++ ++U_NAMESPACE_BEGIN ++ ++/** ++ * A ByteSink can be filled with bytes. ++ * @stable ICU 4.2 ++ */ ++class U_COMMON_API ByteSink : public UMemory { ++public: ++ /** ++ * Default constructor. ++ * @stable ICU 4.2 ++ */ ++ ByteSink() { } ++ /** ++ * Virtual destructor. ++ * @stable ICU 4.2 ++ */ ++ virtual ~ByteSink(); ++ ++ /** ++ * Append "bytes[0,n-1]" to this. ++ * @param bytes the pointer to the bytes ++ * @param n the number of bytes; must be non-negative ++ * @stable ICU 4.2 ++ */ ++ virtual void Append(const char* bytes, int32_t n) = 0; ++ ++ /** ++ * Returns a writable buffer for appending and writes the buffer's capacity to ++ * *result_capacity. Guarantees *result_capacity>=min_capacity. ++ * May return a pointer to the caller-owned scratch buffer which must have ++ * scratch_capacity>=min_capacity. ++ * The returned buffer is only valid until the next operation ++ * on this ByteSink. ++ * ++ * After writing at most *result_capacity bytes, call Append() with the ++ * pointer returned from this function and the number of bytes written. ++ * Many Append() implementations will avoid copying bytes if this function ++ * returned an internal buffer. ++ * ++ * Partial usage example: ++ * int32_t capacity; ++ * char* buffer = sink->GetAppendBuffer(..., &capacity); ++ * ... Write n bytes into buffer, with n <= capacity. ++ * sink->Append(buffer, n); ++ * In many implementations, that call to Append will avoid copying bytes. ++ * ++ * If the ByteSink allocates or reallocates an internal buffer, it should use ++ * the desired_capacity_hint if appropriate. ++ * If a caller cannot provide a reasonable guess at the desired capacity, ++ * it should pass desired_capacity_hint=0. ++ * ++ * If a non-scratch buffer is returned, the caller may only pass ++ * a prefix to it to Append(). ++ * That is, it is not correct to pass an interior pointer to Append(). ++ * ++ * The default implementation always returns the scratch buffer. ++ * ++ * @param min_capacity required minimum capacity of the returned buffer; ++ * must be non-negative ++ * @param desired_capacity_hint desired capacity of the returned buffer; ++ * must be non-negative ++ * @param scratch default caller-owned buffer ++ * @param scratch_capacity capacity of the scratch buffer ++ * @param result_capacity pointer to an integer which will be set to the ++ * capacity of the returned buffer ++ * @return a buffer with *result_capacity>=min_capacity ++ * @stable ICU 4.2 ++ */ ++ virtual char* GetAppendBuffer(int32_t min_capacity, ++ int32_t desired_capacity_hint, ++ char* scratch, int32_t scratch_capacity, ++ int32_t* result_capacity); ++ ++ /** ++ * Flush internal buffers. ++ * Some byte sinks use internal buffers or provide buffering ++ * and require calling Flush() at the end of the stream. ++ * The ByteSink should be ready for further Append() calls after Flush(). ++ * The default implementation of Flush() does nothing. ++ * @stable ICU 4.2 ++ */ ++ virtual void Flush(); ++ ++private: ++ ByteSink(const ByteSink &); // copy constructor not implemented ++ ByteSink &operator=(const ByteSink &); // assignment operator not implemented ++}; ++ ++// ------------------------------------------------------------- ++// Some standard implementations ++ ++/** ++ * Implementation of ByteSink that writes to a flat byte array, ++ * with bounds-checking: ++ * This sink will not write more than capacity bytes to outbuf. ++ * If more than capacity bytes are Append()ed, then excess bytes are ignored, ++ * and Overflowed() will return true. ++ * Overflow does not cause a runtime error. ++ * @stable ICU 4.2 ++ */ ++class U_COMMON_API CheckedArrayByteSink : public ByteSink { ++public: ++ /** ++ * Constructs a ByteSink that will write to outbuf[0..capacity-1]. ++ * @param outbuf buffer to write to ++ * @param capacity size of the buffer ++ * @stable ICU 4.2 ++ */ ++ CheckedArrayByteSink(char* outbuf, int32_t capacity); ++ /** ++ * Destructor. ++ * @stable ICU 4.2 ++ */ ++ virtual ~CheckedArrayByteSink(); ++ /** ++ * Returns the sink to its original state, without modifying the buffer. ++ * Useful for reusing both the buffer and the sink for multiple streams. ++ * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 ++ * and Overflowed()=FALSE. ++ * @return *this ++ * @stable ICU 4.6 ++ */ ++ virtual CheckedArrayByteSink& Reset(); ++ /** ++ * Append "bytes[0,n-1]" to this. ++ * @param bytes the pointer to the bytes ++ * @param n the number of bytes; must be non-negative ++ * @stable ICU 4.2 ++ */ ++ virtual void Append(const char* bytes, int32_t n); ++ /** ++ * Returns a writable buffer for appending and writes the buffer's capacity to ++ * *result_capacity. For details see the base class documentation. ++ * @param min_capacity required minimum capacity of the returned buffer; ++ * must be non-negative ++ * @param desired_capacity_hint desired capacity of the returned buffer; ++ * must be non-negative ++ * @param scratch default caller-owned buffer ++ * @param scratch_capacity capacity of the scratch buffer ++ * @param result_capacity pointer to an integer which will be set to the ++ * capacity of the returned buffer ++ * @return a buffer with *result_capacity>=min_capacity ++ * @stable ICU 4.2 ++ */ ++ virtual char* GetAppendBuffer(int32_t min_capacity, ++ int32_t desired_capacity_hint, ++ char* scratch, int32_t scratch_capacity, ++ int32_t* result_capacity); ++ /** ++ * Returns the number of bytes actually written to the sink. ++ * @return number of bytes written to the buffer ++ * @stable ICU 4.2 ++ */ ++ int32_t NumberOfBytesWritten() const { return size_; } ++ /** ++ * Returns true if any bytes were discarded, i.e., if there was an ++ * attempt to write more than 'capacity' bytes. ++ * @return TRUE if more than 'capacity' bytes were Append()ed ++ * @stable ICU 4.2 ++ */ ++ UBool Overflowed() const { return overflowed_; } ++ /** ++ * Returns the number of bytes appended to the sink. ++ * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() ++ * else they return the same number. ++ * @return number of bytes written to the buffer ++ * @stable ICU 4.6 ++ */ ++ int32_t NumberOfBytesAppended() const { return appended_; } ++private: ++ char* outbuf_; ++ const int32_t capacity_; ++ int32_t size_; ++ int32_t appended_; ++ UBool overflowed_; ++ CheckedArrayByteSink(); ///< default constructor not implemented ++ CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented ++ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented ++}; ++ ++#if U_HAVE_STD_STRING ++ ++/** ++ * Implementation of ByteSink that writes to a "string". ++ * The StringClass is usually instantiated with a std::string. ++ * @stable ICU 4.2 ++ */ ++templateCharacters can be accessed in two ways: as code units or as ++ * code points. ++ * Unicode code points are 21-bit integers and are the scalar values ++ * of Unicode characters. ICU uses the type UChar32 for them. ++ * Unicode code units are the storage units of a given ++ * Unicode/UCS Transformation Format (a character encoding scheme). ++ * With UTF-16, all code points can be represented with either one ++ * or two code units ("surrogates"). ++ * String storage is typically based on code units, while properties ++ * of characters are typically determined using code point values. ++ * Some processes may be designed to work with sequences of code units, ++ * or it may be known that all characters that are important to an ++ * algorithm can be represented with single code units. ++ * Other processes will need to use the code point access functions.
++ * ++ *ForwardCharacterIterator provides nextPostInc() to access
++ * a code unit and advance an internal position into the text object,
++ * similar to a return text[position++]
.
++ * It provides next32PostInc() to access a code point and advance an internal
++ * position.
next32PostInc() assumes that the current position is that of ++ * the beginning of a code point, i.e., of its first code unit. ++ * After next32PostInc(), this will be true again. ++ * In general, access to code units and code points in the same ++ * iteration loop should not be mixed. In UTF-16, if the current position ++ * is on a second code unit (Low Surrogate), then only that code unit ++ * is returned even by next32PostInc().
++ * ++ *For iteration with either function, there are two ways to ++ * check for the end of the iteration. When there are no more ++ * characters in the text object: ++ *
Despite the fact that this function is public, ++ * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! ++ * @return a UClassID for this ForwardCharacterIterator ++ * @stable ICU 2.0 ++ */ ++ virtual UClassID getDynamicClassID(void) const = 0; ++ ++ /** ++ * Gets the current code unit for returning and advances to the next code unit ++ * in the iteration range ++ * (toward endIndex()). If there are ++ * no more code units to return, returns DONE. ++ * @return the current code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar nextPostInc(void) = 0; ++ ++ /** ++ * Gets the current code point for returning and advances to the next code point ++ * in the iteration range ++ * (toward endIndex()). If there are ++ * no more code points to return, returns DONE. ++ * @return the current code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 next32PostInc(void) = 0; ++ ++ /** ++ * Returns FALSE if there are no more code units or code points ++ * at or after the current position in the iteration range. ++ * This is used with nextPostInc() or next32PostInc() in forward ++ * iteration. ++ * @returns FALSE if there are no more code units or code points ++ * at or after the current position in the iteration range. ++ * @stable ICU 2.0 ++ */ ++ virtual UBool hasNext() = 0; ++ ++protected: ++ /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/ ++ ForwardCharacterIterator(); ++ ++ /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/ ++ ForwardCharacterIterator(const ForwardCharacterIterator &other); ++ ++ /** ++ * Assignment operator to be overridden in the implementing class. ++ * @stable ICU 2.0 ++ */ ++ ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; } ++}; ++ ++/** ++ * Abstract class that defines an API for iteration ++ * on text objects. ++ * This is an interface for forward and backward iteration ++ * and random access into a text object. ++ * ++ *
The API provides backward compatibility to the Java and older ICU ++ * CharacterIterator classes but extends them significantly: ++ *
Examples for some of the new functions:
++ * ++ * Forward iteration with hasNext(): ++ * \code ++ * void forward1(CharacterIterator &it) { ++ * UChar32 c; ++ * for(it.setToStart(); it.hasNext();) { ++ * c=it.next32PostInc(); ++ * // use c ++ * } ++ * } ++ * \endcode ++ * Forward iteration more similar to loops with the old forward iteration, ++ * showing a way to convert simple for() loops: ++ * \code ++ * void forward2(CharacterIterator &it) { ++ * UChar c; ++ * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { ++ * // use c ++ * } ++ * } ++ * \endcode ++ * Backward iteration with setToEnd() and hasPrevious(): ++ * \code ++ * void backward1(CharacterIterator &it) { ++ * UChar32 c; ++ * for(it.setToEnd(); it.hasPrevious();) { ++ * c=it.previous32(); ++ * // use c ++ * } ++ * } ++ * \endcode ++ * Backward iteration with a more traditional for() loop: ++ * \code ++ * void backward2(CharacterIterator &it) { ++ * UChar c; ++ * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { ++ * // use c ++ * } ++ * } ++ * \endcode ++ * ++ * Example for random access: ++ * \code ++ * void random(CharacterIterator &it) { ++ * // set to the third code point from the beginning ++ * it.move32(3, CharacterIterator::kStart); ++ * // get a code point from here without moving the position ++ * UChar32 c=it.current32(); ++ * // get the position ++ * int32_t pos=it.getIndex(); ++ * // get the previous code unit ++ * UChar u=it.previous(); ++ * // move back one more code unit ++ * it.move(-1, CharacterIterator::kCurrent); ++ * // set the position back to where it was ++ * // and read the same code point c and move beyond it ++ * it.setIndex(pos); ++ * if(c!=it.next32PostInc()) { ++ * exit(1); // CharacterIterator inconsistent ++ * } ++ * } ++ * \endcode ++ * ++ *Examples, especially for the old API:
++ * ++ * Function processing characters, in this example simple output ++ *++ * \code ++ * void processChar( UChar c ) ++ * { ++ * cout << " " << c; ++ * } ++ * \endcode ++ *++ * Traverse the text from start to finish ++ *
++ * \code ++ * void traverseForward(CharacterIterator& iter) ++ * { ++ * for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { ++ * processChar(c); ++ * } ++ * } ++ * \endcode ++ *++ * Traverse the text backwards, from end to start ++ *
++ * \code ++ * void traverseBackward(CharacterIterator& iter) ++ * { ++ * for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) { ++ * processChar(c); ++ * } ++ * } ++ * \endcode ++ *++ * Traverse both forward and backward from a given position in the text. ++ * Calls to notBoundary() in this example represents some additional stopping criteria. ++ *
++ * \code ++ * void traverseOut(CharacterIterator& iter, int32_t pos) ++ * { ++ * UChar c; ++ * for (c = iter.setIndex(pos); ++ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c)); ++ * c = iter.next()) {} ++ * int32_t end = iter.getIndex(); ++ * for (c = iter.setIndex(pos); ++ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c)); ++ * c = iter.previous()) {} ++ * int32_t start = iter.getIndex() + 1; ++ * ++ * cout << "start: " << start << " end: " << end << endl; ++ * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) { ++ * processChar(c); ++ * } ++ * } ++ * \endcode ++ *++ * Creating a StringCharacterIterator and calling the test functions ++ *
++ * \code ++ * void CharacterIterator_Example( void ) ++ * { ++ * cout << endl << "===== CharacterIterator_Example: =====" << endl; ++ * UnicodeString text("Ein kleiner Satz."); ++ * StringCharacterIterator iterator(text); ++ * cout << "----- traverseForward: -----------" << endl; ++ * traverseForward( iterator ); ++ * cout << endl << endl << "----- traverseBackward: ----------" << endl; ++ * traverseBackward( iterator ); ++ * cout << endl << endl << "----- traverseOut: ---------------" << endl; ++ * traverseOut( iterator, 7 ); ++ * cout << endl << endl << "-----" << endl; ++ * } ++ * \endcode ++ *++ * ++ * @stable ICU 2.0 ++ */ ++class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { ++public: ++ /** ++ * Origin enumeration for the move() and move32() functions. ++ * @stable ICU 2.0 ++ */ ++ enum EOrigin { kStart, kCurrent, kEnd }; ++ ++ /** ++ * Destructor. ++ * @stable ICU 2.0 ++ */ ++ virtual ~CharacterIterator(); ++ ++ /** ++ * Returns a pointer to a new CharacterIterator of the same ++ * concrete class as this one, and referring to the same ++ * character in the same text-storage object as this one. The ++ * caller is responsible for deleting the new clone. ++ * @return a pointer to a new CharacterIterator ++ * @stable ICU 2.0 ++ */ ++ virtual CharacterIterator* clone(void) const = 0; ++ ++ /** ++ * Sets the iterator to refer to the first code unit in its ++ * iteration range, and returns that code unit. ++ * This can be used to begin an iteration with next(). ++ * @return the first code unit in its iteration range. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar first(void) = 0; ++ ++ /** ++ * Sets the iterator to refer to the first code unit in its ++ * iteration range, returns that code unit, and moves the position ++ * to the second code unit. This is an alternative to setToStart() ++ * for forward iteration with nextPostInc(). ++ * @return the first code unit in its iteration range. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar firstPostInc(void); ++ ++ /** ++ * Sets the iterator to refer to the first code point in its ++ * iteration range, and returns that code unit, ++ * This can be used to begin an iteration with next32(). ++ * Note that an iteration with next32PostInc(), beginning with, ++ * e.g., setToStart() or firstPostInc(), is more efficient. ++ * @return the first code point in its iteration range. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 first32(void) = 0; ++ ++ /** ++ * Sets the iterator to refer to the first code point in its ++ * iteration range, returns that code point, and moves the position ++ * to the second code point. This is an alternative to setToStart() ++ * for forward iteration with next32PostInc(). ++ * @return the first code point in its iteration range. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 first32PostInc(void); ++ ++ /** ++ * Sets the iterator to refer to the first code unit or code point in its ++ * iteration range. This can be used to begin a forward ++ * iteration with nextPostInc() or next32PostInc(). ++ * @return the start position of the iteration range ++ * @stable ICU 2.0 ++ */ ++ inline int32_t setToStart(); ++ ++ /** ++ * Sets the iterator to refer to the last code unit in its ++ * iteration range, and returns that code unit. ++ * This can be used to begin an iteration with previous(). ++ * @return the last code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar last(void) = 0; ++ ++ /** ++ * Sets the iterator to refer to the last code point in its ++ * iteration range, and returns that code unit. ++ * This can be used to begin an iteration with previous32(). ++ * @return the last code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 last32(void) = 0; ++ ++ /** ++ * Sets the iterator to the end of its iteration range, just behind ++ * the last code unit or code point. This can be used to begin a backward ++ * iteration with previous() or previous32(). ++ * @return the end position of the iteration range ++ * @stable ICU 2.0 ++ */ ++ inline int32_t setToEnd(); ++ ++ /** ++ * Sets the iterator to refer to the "position"-th code unit ++ * in the text-storage object the iterator refers to, and ++ * returns that code unit. ++ * @param position the "position"-th code unit in the text-storage object ++ * @return the "position"-th code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar setIndex(int32_t position) = 0; ++ ++ /** ++ * Sets the iterator to refer to the beginning of the code point ++ * that contains the "position"-th code unit ++ * in the text-storage object the iterator refers to, and ++ * returns that code point. ++ * The current position is adjusted to the beginning of the code point ++ * (its first code unit). ++ * @param position the "position"-th code unit in the text-storage object ++ * @return the "position"-th code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 setIndex32(int32_t position) = 0; ++ ++ /** ++ * Returns the code unit the iterator currently refers to. ++ * @return the current code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar current(void) const = 0; ++ ++ /** ++ * Returns the code point the iterator currently refers to. ++ * @return the current code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 current32(void) const = 0; ++ ++ /** ++ * Advances to the next code unit in the iteration range ++ * (toward endIndex()), and returns that code unit. If there are ++ * no more code units to return, returns DONE. ++ * @return the next code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar next(void) = 0; ++ ++ /** ++ * Advances to the next code point in the iteration range ++ * (toward endIndex()), and returns that code point. If there are ++ * no more code points to return, returns DONE. ++ * Note that iteration with "pre-increment" semantics is less ++ * efficient than iteration with "post-increment" semantics ++ * that is provided by next32PostInc(). ++ * @return the next code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 next32(void) = 0; ++ ++ /** ++ * Advances to the previous code unit in the iteration range ++ * (toward startIndex()), and returns that code unit. If there are ++ * no more code units to return, returns DONE. ++ * @return the previous code unit. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar previous(void) = 0; ++ ++ /** ++ * Advances to the previous code point in the iteration range ++ * (toward startIndex()), and returns that code point. If there are ++ * no more code points to return, returns DONE. ++ * @return the previous code point. ++ * @stable ICU 2.0 ++ */ ++ virtual UChar32 previous32(void) = 0; ++ ++ /** ++ * Returns FALSE if there are no more code units or code points ++ * before the current position in the iteration range. ++ * This is used with previous() or previous32() in backward ++ * iteration. ++ * @return FALSE if there are no more code units or code points ++ * before the current position in the iteration range, return TRUE otherwise. ++ * @stable ICU 2.0 ++ */ ++ virtual UBool hasPrevious() = 0; ++ ++ /** ++ * Returns the numeric index in the underlying text-storage ++ * object of the character returned by first(). Since it's ++ * possible to create an iterator that iterates across only ++ * part of a text-storage object, this number isn't ++ * necessarily 0. ++ * @returns the numeric index in the underlying text-storage ++ * object of the character returned by first(). ++ * @stable ICU 2.0 ++ */ ++ inline int32_t startIndex(void) const; ++ ++ /** ++ * Returns the numeric index in the underlying text-storage ++ * object of the position immediately BEYOND the character ++ * returned by last(). ++ * @return the numeric index in the underlying text-storage ++ * object of the position immediately BEYOND the character ++ * returned by last(). ++ * @stable ICU 2.0 ++ */ ++ inline int32_t endIndex(void) const; ++ ++ /** ++ * Returns the numeric index in the underlying text-storage ++ * object of the character the iterator currently refers to ++ * (i.e., the character returned by current()). ++ * @return the numberic index in the text-storage object of ++ * the character the iterator currently refers to ++ * @stable ICU 2.0 ++ */ ++ inline int32_t getIndex(void) const; ++ ++ /** ++ * Returns the length of the entire text in the underlying ++ * text-storage object. ++ * @return the length of the entire text in the text-storage object ++ * @stable ICU 2.0 ++ */ ++ inline int32_t getLength() const; ++ ++ /** ++ * Moves the current position relative to the start or end of the ++ * iteration range, or relative to the current position itself. ++ * The movement is expressed in numbers of code units forward ++ * or backward by specifying a positive or negative delta. ++ * @param delta the position relative to origin. A positive delta means forward; ++ * a negative delta means backward. ++ * @param origin Origin enumeration {kStart, kCurrent, kEnd} ++ * @return the new position ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t move(int32_t delta, EOrigin origin) = 0; ++ ++ /** ++ * Moves the current position relative to the start or end of the ++ * iteration range, or relative to the current position itself. ++ * The movement is expressed in numbers of code points forward ++ * or backward by specifying a positive or negative delta. ++ * @param delta the position relative to origin. A positive delta means forward; ++ * a negative delta means backward. ++ * @param origin Origin enumeration {kStart, kCurrent, kEnd} ++ * @return the new position ++ * @stable ICU 2.0 ++ */ ++ virtual int32_t move32(int32_t delta, EOrigin origin) = 0; ++ ++ /** ++ * Copies the text under iteration into the UnicodeString ++ * referred to by "result". ++ * @param result Receives a copy of the text under iteration. ++ * @stable ICU 2.0 ++ */ ++ virtual void getText(UnicodeString& result) = 0; ++ ++protected: ++ /** ++ * Empty constructor. ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator(); ++ ++ /** ++ * Constructor, just setting the length field in this base class. ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator(int32_t length); ++ ++ /** ++ * Constructor, just setting the length and position fields in this base class. ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator(int32_t length, int32_t position); ++ ++ /** ++ * Constructor, just setting the length, start, end, and position fields in this base class. ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); ++ ++ /** ++ * Copy constructor. ++ * ++ * @param that The CharacterIterator to be copied ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator(const CharacterIterator &that); ++ ++ /** ++ * Assignment operator. Sets this CharacterIterator to have the same behavior, ++ * as the one passed in. ++ * @param that The CharacterIterator passed in. ++ * @return the newly set CharacterIterator. ++ * @stable ICU 2.0 ++ */ ++ CharacterIterator &operator=(const CharacterIterator &that); ++ ++ /** ++ * Base class text length field. ++ * Necessary this for correct getText() and hashCode(). ++ * @stable ICU 2.0 ++ */ ++ int32_t textLength; ++ ++ /** ++ * Base class field for the current position. ++ * @stable ICU 2.0 ++ */ ++ int32_t pos; ++ ++ /** ++ * Base class field for the start of the iteration range. ++ * @stable ICU 2.0 ++ */ ++ int32_t begin; ++ ++ /** ++ * Base class field for the end of the iteration range. ++ * @stable ICU 2.0 ++ */ ++ int32_t end; ++}; ++ ++inline UBool ++ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const { ++ return !operator==(that); ++} ++ ++inline int32_t ++CharacterIterator::setToStart() { ++ return move(0, kStart); ++} ++ ++inline int32_t ++CharacterIterator::setToEnd() { ++ return move(0, kEnd); ++} ++ ++inline int32_t ++CharacterIterator::startIndex(void) const { ++ return begin; ++} ++ ++inline int32_t ++CharacterIterator::endIndex(void) const { ++ return end; ++} ++ ++inline int32_t ++CharacterIterator::getIndex(void) const { ++ return pos; ++} ++ ++inline int32_t ++CharacterIterator::getLength(void) const { ++ return textLength; ++} ++ ++U_NAMESPACE_END ++#endif +diff --git a/jdk/src/share/native/common/unicode/dbbi.h b/jdk/src/share/native/common/unicode/dbbi.h +new file mode 100644 +index 0000000000..7187c3c20f +--- /dev/null ++++ b/jdk/src/share/native/common/unicode/dbbi.h +@@ -0,0 +1,40 @@ ++/* ++********************************************************************** ++* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved. ++********************************************************************** ++* Date Name Description ++* 12/1/99 rgillam Complete port from Java. ++* 01/13/2000 helena Added UErrorCode to ctors. ++********************************************************************** ++*/ ++ ++#ifndef DBBI_H ++#define DBBI_H ++ ++#include "unicode/rbbi.h" ++ ++#if !UCONFIG_NO_BREAK_ITERATION ++ ++/** ++ * \file ++ * \brief C++ API: Dictionary Based Break Iterator ++ */ ++ ++U_NAMESPACE_BEGIN ++ ++#ifndef U_HIDE_DEPRECATED_API ++/** ++ * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary- ++ * based break iteration has been folded into the base class. This class ++ * is deprecated as of ICU 3.6. ++ * @deprecated ICU 3.6 ++ */ ++typedef RuleBasedBreakIterator DictionaryBasedBreakIterator; ++ ++#endif /* U_HIDE_DEPRECATED_API */ ++ ++U_NAMESPACE_END ++ ++#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ ++ ++#endif +diff --git a/jdk/src/share/native/common/unicode/docmain.h b/jdk/src/share/native/common/unicode/docmain.h +new file mode 100644 +index 0000000000..df3fe842aa +--- /dev/null ++++ b/jdk/src/share/native/common/unicode/docmain.h +@@ -0,0 +1,215 @@ ++/******************************************************************** ++ * COPYRIGHT: ++ * Copyright (c) 1997-2012, International Business Machines Corporation and ++ * others. All Rights Reserved. ++ * ++ * FILE NAME: DOCMAIN.h ++ * ++ * Date Name Description ++ * 12/11/2000 Ram Creation. ++ */ ++ ++/** ++ * \file ++ * \brief (Non API- contains Doxygen definitions) ++ * ++ * This file contains documentation for Doxygen and doesnot have ++ * any significance with respect to C or C++ API ++ */ ++ ++/*! \mainpage ++ * ++ * \section API API Reference Usage ++ * ++ *
Use Class Hierarchy or Alphabetical List ++ * or Compound List ++ * to find the class you are interested in. For example, to find BreakIterator, ++ * you can go to the Alphabetical List, then click on ++ * "BreakIterator". Once you are at the class, you will find an inheritance ++ * chart, a list of the public members, a detailed description of the class, ++ * then detailed member descriptions.
++ * ++ *Use Module List or File Members ++ * to find a list of all the functions and constants. ++ * For example, to find BreakIterator functions you would click on ++ * File List, ++ * then find "ubrk.h" and click on it. You will find descriptions of Defines, ++ * Typedefs, Enumerations, and Functions, with detailed descriptions below. ++ * If you want to find a specific function, such as ubrk_next(), then click ++ * first on File Members, then use your browser ++ * Find dialog to search for "ubrk_next()".
++ * ++ * ++ *The API References for each release of ICU are also available as ++ * a zip file from the ICU ++ * download page.
++ * ++ *Module Name | ++ *C | ++ *C++ | ++ *
Basic Types and Constants | ++ *utypes.h | ++ *utypes.h | ++ *
Strings and Character Iteration | ++ *ustring.h, utf8.h, utf16.h, UText, UCharIterator | ++ *icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink | ++ *
Unicode Character Properties and Names |
++ * uchar.h, uscript.h | ++ *C API | ++ *
Sets of Unicode Code Points and Strings | ++ *uset.h | ++ *icu::UnicodeSet | ++ *
Maps from Strings to Integer Values | ++ *(no C API) | ++ *icu::BytesTrie, icu::UCharsTrie | ++ *
Codepage Conversion | ++ *ucnv.h, ucnvsel.hb | ++ *C API | ++ *
Unicode Text Compression | ++ *ucnv.h (encoding name "SCSU" or "BOCU-1") |
++ * C API | ++ *
Locales | ++ *uloc.h | ++ *icu::Locale | ++ *
Resource Bundles | ++ *ures.h | ++ *icu::ResourceBundle | ++ *
Normalization | ++ *unorm2.h | ++ *icu::Normalizer2 | ++ *
Calendars | ++ *ucal.h | ++ *icu::Calendar | ++ *
Date and Time Formatting | ++ *udat.h | ++ *icu::DateFormat | ++ *
Message Formatting | ++ *umsg.h | ++ *icu::MessageFormat | ++ *
Number Formatting | ++ *unum.h | ++ *icu::NumberFormat | ++ *
Number Spellout (Rule Based Number Formatting) |
++ * unum.h (use UNUM_SPELLOUT) |
++ * icu::RuleBasedNumberFormat | ++ *
Text Transformation (Transliteration) |
++ * utrans.h | ++ *icu::Transliterator | ++ *
Bidirectional Algorithm | ++ *ubidi.h | ++ *C API | ++ *
Arabic Shaping | ++ *ushape.h | ++ *C API | ++ *
Collation | ++ *ucol.h | ++ *icu::Collator | ++ *
String Searching | ++ *usearch.h | ++ *icu::StringSearch | ++ *
Index Characters/ Bucketing for Sorted Lists |
++ * (no C API) | ++ *icu::AlphabeticIndex | ++ *
Text Boundary Analysis (Break Iteration) |
++ * ubrk.h | ++ *icu::BreakIterator | ++ *
Regular Expressions | ++ *uregex.h | ++ *icu::RegexPattern, icu::RegexMatcher | ++ *
StringPrep | ++ *usprep.h | ++ *C API | ++ *
International Domain Names in Applications: ++ * UTS #46 in C/C++, IDNA2003 only via C API |
++ * uidna.h | ++ *idna.h | ++ *
Identifier Spoofing & Confusability | ++ *uspoof.h | ++ *C API | ++ *
Universal Time Scale | ++ *utmscale.h | ++ *C API | ++ *
Layout Engine/Complex Text Layout | ++ *loengine.h | ++ *icu::LayoutEngine,icu::ParagraphLayout | ++ *
ICU I/O | ++ *ustdio.h | ++ *ustream.h | ++ *
++ * . Base* polymorphic_pointer = createPolymorphicObject(); ++ * . if (polymorphic_pointer->getDynamicClassID() == ++ * . erived::getStaticClassID()) ... ++ *++ * @return The class ID for all objects of this class. ++ * @stable ICU 4.0 ++ */ ++ static UClassID U_EXPORT2 getStaticClassID(void); ++ ++ /** ++ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This ++ * method is to implement a simple version of RTTI, since not all C++ ++ * compilers support genuine RTTI. Polymorphic operator==() and clone() ++ * methods call this method. ++ * ++ * @return The class ID for this object. All objects of a ++ * given class have the same class ID. Objects of ++ * other classes have different class IDs. ++ * @stable ICU 4.0 ++ */ ++ virtual UClassID getDynamicClassID(void) const; ++ ++ ++ /** ++ * Copy constructor. ++ * @stable ICU 4.0 ++ */ ++ DateInterval(const DateInterval& other); ++ ++ /** ++ * Default assignment operator ++ * @stable ICU 4.0 ++ */ ++ DateInterval& operator=(const DateInterval&); ++ ++ /** ++ * Equality operator. ++ * @return TRUE if the two DateIntervals are the same ++ * @stable ICU 4.0 ++ */ ++ virtual UBool operator==(const DateInterval& other) const; ++ ++ /** ++ * Non-equality operator ++ * @return TRUE if the two DateIntervals are not the same ++ * @stable ICU 4.0 ++ */ ++ UBool operator!=(const DateInterval& other) const; ++ ++ ++ /** ++ * clone this object. ++ * The caller owns the result and should delete it when done. ++ * @return a cloned DateInterval ++ * @stable ICU 4.0 ++ */ ++ virtual DateInterval* clone() const; ++ ++private: ++ /** ++ * Default constructor, not implemented. ++ */ ++ DateInterval(); ++ ++ UDate fromDate; ++ UDate toDate; ++ ++} ;// end class DateInterval ++ ++ ++inline UDate ++DateInterval::getFromDate() const { ++ return fromDate; ++} ++ ++ ++inline UDate ++DateInterval::getToDate() const { ++ return toDate; ++} ++ ++ ++inline UBool ++DateInterval::operator!=(const DateInterval& other) const { ++ return ( !operator==(other) ); ++} ++ ++ ++U_NAMESPACE_END ++ ++#endif +diff --git a/jdk/src/share/native/common/unicode/edits.h b/jdk/src/share/native/common/unicode/edits.h +new file mode 100644 +index 0000000000..c3ceaccb3b +--- /dev/null ++++ b/jdk/src/share/native/common/unicode/edits.h +@@ -0,0 +1,531 @@ ++// © 2016 and later: Unicode, Inc. and others. ++// License & terms of use: http://www.unicode.org/copyright.html ++ ++// edits.h ++// created: 2016dec30 Markus W. Scherer ++ ++#ifndef __EDITS_H__ ++#define __EDITS_H__ ++ ++#include "unicode/utypes.h" ++ ++#if U_SHOW_CPLUSPLUS_API ++ ++#include "unicode/uobject.h" ++ ++/** ++ * \file ++ * \brief C++ API: C++ class Edits for low-level string transformations on styled text. ++ */ ++ ++U_NAMESPACE_BEGIN ++ ++class UnicodeString; ++ ++/** ++ * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions ++ * in linear progression. Does not support moving/reordering of text. ++ * ++ * There are two types of edits: change edits and no-change edits. Add edits to ++ * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and ++ * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, ++ * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one ++ * mapping between code points in the source and destination strings. ++ * ++ * After all edits have been added, instances of this class should be considered immutable, and an ++ * {@link Edits::Iterator} can be used for queries. ++ * ++ * There are four flavors of Edits::Iterator: ++ * ++ *
C API allowing run-time loadable modules that extend or modify ICU functionality.
++ * ++ *At ICU startup time, the environment variable "ICU_PLUGINS" will be ++ * queried for a directory name. If it is not set, the preprocessor symbol ++ * "DEFAULT_ICU_PLUGINS" will be checked for a default value.
++ * ++ *Within the above-named directory, the file "icuplugins##.txt" will be ++ * opened, if present, where ## is the major+minor number of the currently ++ * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)
++ * ++ *The configuration file has this format:
++ * ++ *An example configuration file is, in its entirety:
++ * ++ * \code ++ * # this is icuplugins44.txt ++ * testplug.dll myPlugin hello=world ++ * \endcode ++ *Plugins are categorized as "high" or "low" level. Low level are those ++ * which must be run BEFORE high level plugins, and before any operations ++ * which cause ICU to be 'initialized'. If a plugin is low level but ++ * causes ICU to allocate memory or become initialized, that plugin is said ++ * to cause a 'level change'.
++ * ++ *At load time, ICU first queries all plugins to determine their level, ++ * then loads all 'low' plugins first, and then loads all 'high' plugins. ++ * Plugins are otherwise loaded in the order listed in the configuration file.
++ * ++ *The UPlugData* is an opaque pointer to the plugin-specific data, and is ++ * used in all other API calls.
++ * ++ *The API contract is:
++ *LocaleBuilder
is used to build instances of Locale
++ * from values configured by the setters. Unlike the Locale
++ * constructors, the LocaleBuilder
checks if a value configured by a
++ * setter satisfies the syntax requirements defined by the Locale
++ * class. A Locale
object created by a LocaleBuilder
is
++ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
++ * without losing information.
++ *
++ * The following example shows how to create a Locale
object
++ * with the LocaleBuilder
.
++ *
++ *++ * ++ *++ * UErrorCode status = U_ZERO_ERROR; ++ * Locale aLocale = LocaleBuilder() ++ * .setLanguage("sr") ++ * .setScript("Latn") ++ * .setRegion("RS") ++ * .build(status); ++ * if (U_SUCCESS(status)) { ++ * // ... ++ * } ++ *++ *
LocaleBuilders can be reused; clear()
resets all
++ * fields to their default values.
++ *
++ *
LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
++ * except setLanguageTag and setLocale, LocaleBuilder will return immediately
++ * if the internal UErrorCode is in error state.
++ * To reset internal state and error code, call clear method.
++ * The setLanguageTag and setLocale method will first clear the internal
++ * UErrorCode, then track the error of the validation of the input parameter
++ * into the internal UErrorCode.
++ *
++ * @draft ICU 64
++ */
++class U_COMMON_API LocaleBuilder : public UObject {
++public:
++ /**
++ * Constructs an empty LocaleBuilder. The default value of all
++ * fields, extensions, and private use information is the
++ * empty string.
++ *
++ * @draft ICU 64
++ */
++ LocaleBuilder();
++
++ /**
++ * Destructor
++ * @draft ICU 64
++ */
++ virtual ~LocaleBuilder();
++
++ /**
++ * Resets the LocaleBuilder
to match the provided
++ * locale
. Existing state is discarded.
++ *
++ *
All fields of the locale must be well-formed. ++ *
This method clears the internal UErrorCode.
++ *
++ * @param locale the locale
++ * @return This builder.
++ *
++ * @draft ICU 64
++ */
++ LocaleBuilder& setLocale(const Locale& locale);
++
++ /**
++ * Resets the LocaleBuilder to match the provided
++ * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
++ * Discards the existing state. the empty string cause the builder to be
++ * reset, like {@link #clear}. Grandfathered tags are converted to their
++ * canonical form before being processed. Otherwise, the language
++ * tag
must be well-formed, or else the build() method will later
++ * report an U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
This method clears the internal UErrorCode.
++ *
++ * @param tag the language tag, defined as
++ * [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id).
++ * @return This builder.
++ * @draft ICU 64
++ */
++ LocaleBuilder& setLanguageTag(StringPiece tag);
++
++ /**
++ * Sets the language. If language
is the empty string, the
++ * language in this LocaleBuilder
is removed. Otherwise, the
++ * language
must be well-formed, or else the build() method will
++ * later report an U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
The syntax of language value is defined as
++ * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
++ *
++ * @param language the language
++ * @return This builder.
++ * @draft ICU 64
++ */
++ LocaleBuilder& setLanguage(StringPiece language);
++
++ /**
++ * Sets the script. If script
is the empty string, the script in
++ * this LocaleBuilder
is removed.
++ * Otherwise, the script
must be well-formed, or else the build()
++ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
The script value is a four-letter script code as
++ * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
++ * defined by ISO 15924
++ *
++ * @param script the script
++ * @return This builder.
++ * @draft ICU 64
++ */
++ LocaleBuilder& setScript(StringPiece script);
++
++ /**
++ * Sets the region. If region is the empty string, the region in this
++ * LocaleBuilder
is removed. Otherwise, the region
++ * must be well-formed, or else the build() method will later report an
++ * U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
The region value is defined by ++ * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag) ++ * as a two-letter ISO 3166 code or a three-digit UN M.49 area code. ++ * ++ *
The region value in the Locale
created by the
++ * LocaleBuilder
is always normalized to upper case.
++ *
++ * @param region the region
++ * @return This builder.
++ * @draft ICU 64
++ */
++ LocaleBuilder& setRegion(StringPiece region);
++
++ /**
++ * Sets the variant. If variant is the empty string, the variant in this
++ * LocaleBuilder
is removed. Otherwise, the variant
++ * must be well-formed, or else the build() method will later report an
++ * U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
Note: This method checks if variant
++ * satisfies the
++ * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
++ * syntax requirements, and normalizes the value to lowercase letters. However,
++ * the Locale
class does not impose any syntactic
++ * restriction on variant. To set an ill-formed variant, use a Locale constructor.
++ * If there are multiple unicode_variant_subtag, the caller must concatenate
++ * them with '-' as separator (ex: "foobar-fibar").
++ *
++ * @param variant the variant
++ * @return This builder.
++ * @draft ICU 64
++ */
++ LocaleBuilder& setVariant(StringPiece variant);
++
++ /**
++ * Sets the extension for the given key. If the value is the empty string,
++ * the extension is removed. Otherwise, the key
and
++ * value
must be well-formed, or else the build() method will
++ * later report an U_ILLEGAL_ARGUMENT_ERROR.
++ *
++ *
Note: The key ('u') is used for the Unicode locale extension. ++ * Setting a value for this key replaces any existing Unicode locale key/type ++ * pairs with those defined in the extension. ++ * ++ *
Note: The key ('x') is used for the private use code. To be ++ * well-formed, the value for this key needs only to have subtags of one to ++ * eight alphanumeric characters, not two to eight as in the general case. ++ * ++ * @param key the extension key ++ * @param value the extension value ++ * @return This builder. ++ * @draft ICU 64 ++ */ ++ LocaleBuilder& setExtension(char key, StringPiece value); ++ ++ /** ++ * Sets the Unicode locale keyword type for the given key. If the type ++ * StringPiece is constructed with a nullptr, the keyword is removed. ++ * If the type is the empty string, the keyword is set without type subtags. ++ * Otherwise, the key and type must be well-formed, or else the build() ++ * method will later report an U_ILLEGAL_ARGUMENT_ERROR. ++ * ++ *
Keys and types are converted to lower case. ++ * ++ *
Note:Setting the 'u' extension via {@link #setExtension} ++ * replaces all Unicode locale keywords with those defined in the ++ * extension. ++ * ++ * @param key the Unicode locale key ++ * @param type the Unicode locale type ++ * @return This builder. ++ * @draft ICU 64 ++ */ ++ LocaleBuilder& setUnicodeLocaleKeyword( ++ StringPiece key, StringPiece type); ++ ++ /** ++ * Adds a unicode locale attribute, if not already present, otherwise ++ * has no effect. The attribute must not be empty string and must be ++ * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status ++ * during the build() call. ++ * ++ * @param attribute the attribute ++ * @return This builder. ++ * @draft ICU 64 ++ */ ++ LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute); ++ ++ /** ++ * Removes a unicode locale attribute, if present, otherwise has no ++ * effect. The attribute must not be empty string and must be well-formed ++ * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call. ++ * ++ *
Attribute comparison for removal is case-insensitive. ++ * ++ * @param attribute the attribute ++ * @return This builder. ++ * @draft ICU 64 ++ */ ++ LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute); ++ ++ /** ++ * Resets the builder to its initial, empty state. ++ *
This method clears the internal UErrorCode.
++ *
++ * @return this builder
++ * @draft ICU 64
++ */
++ LocaleBuilder& clear();
++
++ /**
++ * Resets the extensions to their initial, empty state.
++ * Language, script, region and variant are unchanged.
++ *
++ * @return this builder
++ * @draft ICU 64
++ */
++ LocaleBuilder& clearExtensions();
++
++ /**
++ * Returns an instance of Locale
created from the fields set
++ * on this builder.
++ * If any set methods or during the build() call require memory allocation
++ * but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
++ * If any of the fields set by the setters are not well-formed, the status
++ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
++ * not change after the build() call and the caller is free to keep using
++ * the same builder to build more locales.
++ *
++ * @return a new Locale
++ * @draft ICU 64
++ */
++ Locale build(UErrorCode& status);
++
++#ifndef U_HIDE_DRAFT_API
++ /**
++ * Sets the UErrorCode if an error occurred while recording sets.
++ * Preserves older error codes in the outErrorCode.
++ * @param outErrorCode Set to an error code that occurred while setting subtags.
++ * Unchanged if there is no such error or if outErrorCode
++ * already contained an error.
++ * @return TRUE if U_FAILURE(outErrorCode)
++ * @draft ICU 65
++ */
++ UBool copyErrorTo(UErrorCode &outErrorCode) const;
++#endif /* U_HIDE_DRAFT_API */
++
++private:
++ friend class LocaleMatcher::Result;
++
++ void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
++
++ UErrorCode status_;
++ char language_[9];
++ char script_[5];
++ char region_[4];
++ CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
++ icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
++
++};
++
++U_NAMESPACE_END
++
++#endif // U_HIDE_DRAFT_API
++
++#endif /* U_SHOW_CPLUSPLUS_API */
++
++#endif // __LOCALEBUILDER_H__
+diff --git a/jdk/src/share/native/common/unicode/localematcher.h b/jdk/src/share/native/common/unicode/localematcher.h
+new file mode 100644
+index 0000000000..701123f750
+--- /dev/null
++++ b/jdk/src/share/native/common/unicode/localematcher.h
+@@ -0,0 +1,605 @@
++// © 2019 and later: Unicode, Inc. and others.
++// License & terms of use: http://www.unicode.org/copyright.html#License
++
++// localematcher.h
++// created: 2019may08 Markus W. Scherer
++
++#ifndef __LOCALEMATCHER_H__
++#define __LOCALEMATCHER_H__
++
++#include "unicode/utypes.h"
++
++#if U_SHOW_CPLUSPLUS_API
++
++#include "unicode/locid.h"
++#include "unicode/stringpiece.h"
++#include "unicode/uobject.h"
++
++/**
++ * \file
++ * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
++ */
++
++#ifndef U_HIDE_DRAFT_API
++
++/**
++ * Builder option for whether the language subtag or the script subtag is most important.
++ *
++ * @see Builder#setFavorSubtag(FavorSubtag)
++ * @draft ICU 65
++ */
++enum ULocMatchFavorSubtag {
++ /**
++ * Language differences are most important, then script differences, then region differences.
++ * (This is the default behavior.)
++ *
++ * @draft ICU 65
++ */
++ ULOCMATCH_FAVOR_LANGUAGE,
++ /**
++ * Makes script differences matter relatively more than language differences.
++ *
++ * @draft ICU 65
++ */
++ ULOCMATCH_FAVOR_SCRIPT
++};
++#ifndef U_IN_DOXYGEN
++typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
++#endif
++
++/**
++ * Builder option for whether all desired locales are treated equally or
++ * earlier ones are preferred.
++ *
++ * @see Builder#setDemotionPerDesiredLocale(Demotion)
++ * @draft ICU 65
++ */
++enum ULocMatchDemotion {
++ /**
++ * All desired locales are treated equally.
++ *
++ * @draft ICU 65
++ */
++ ULOCMATCH_DEMOTION_NONE,
++ /**
++ * Earlier desired locales are preferred.
++ *
++ *
From each desired locale to the next, ++ * the distance to any supported locale is increased by an additional amount ++ * which is at least as large as most region mismatches. ++ * A later desired locale has to have a better match with some supported locale ++ * due to more than merely having the same region subtag. ++ * ++ *
For example: Supported={en, sv} desired=[en-GB, sv]
++ * yields Result(en-GB, en)
because
++ * with the demotion of sv its perfect match is no better than
++ * the region distance between the earlier desired locale en-GB and en=en-US.
++ *
++ *
Notes: ++ *
Example: ++ *
++ * UErrorCode errorCode = U_ZERO_ERROR; ++ * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); ++ * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" ++ *++ * ++ *
A matcher takes into account when languages are close to one another, ++ * such as Danish and Norwegian, ++ * and when regional variants are close, like en-GB and en-AU as opposed to en-US. ++ * ++ *
If there are multiple supported locales with the same (language, script, region) ++ * likely subtags, then the current implementation returns the first of those locales. ++ * It ignores variant subtags (except for pseudolocale variants) and extensions. ++ * This may change in future versions. ++ * ++ *
For example, the current implementation does not distinguish between ++ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. ++ * ++ *
If you prefer one equivalent locale over another, then provide only the preferred one, ++ * or place it earlier in the list of supported locales. ++ * ++ *
Otherwise, the order of supported locales may have no effect on the best-match results. ++ * The current implementation compares each desired locale with supported locales ++ * in the following order: ++ * 1. Default locale, if supported; ++ * 2. CLDR "paradigm locales" like en-GB and es-419; ++ * 3. other supported locales. ++ * This may change in future versions. ++ * ++ *
Often a product will just need one matcher instance, built with the languages ++ * that it supports. However, it may want multiple instances with different ++ * default languages based on additional information, such as the domain. ++ * ++ *
This class is not intended for public subclassing. ++ * ++ * @draft ICU 65 ++ */ ++class U_COMMON_API LocaleMatcher : public UMemory { ++public: ++ /** ++ * Data for the best-matching pair of a desired and a supported locale. ++ * Movable but not copyable. ++ * ++ * @draft ICU 65 ++ */ ++ class U_COMMON_API Result : public UMemory { ++ public: ++ /** ++ * Move constructor; might modify the source. ++ * This object will have the same contents that the source object had. ++ * ++ * @param src Result to move contents from. ++ * @draft ICU 65 ++ */ ++ Result(Result &&src) U_NOEXCEPT; ++ ++ /** ++ * Destructor. ++ * ++ * @draft ICU 65 ++ */ ++ ~Result(); ++ ++ /** ++ * Move assignment; might modify the source. ++ * This object will have the same contents that the source object had. ++ * ++ * @param src Result to move contents from. ++ * @draft ICU 65 ++ */ ++ Result &operator=(Result &&src) U_NOEXCEPT; ++ ++ /** ++ * Returns the best-matching desired locale. ++ * nullptr if the list of desired locales is empty or if none matched well enough. ++ * ++ * @return the best-matching desired locale, or nullptr. ++ * @draft ICU 65 ++ */ ++ inline const Locale *getDesiredLocale() const { return desiredLocale; } ++ ++ /** ++ * Returns the best-matching supported locale. ++ * If none matched well enough, this is the default locale. ++ * The default locale is nullptr if the list of supported locales is empty and ++ * no explicit default locale is set. ++ * ++ * @return the best-matching supported locale, or nullptr. ++ * @draft ICU 65 ++ */ ++ inline const Locale *getSupportedLocale() const { return supportedLocale; } ++ ++ /** ++ * Returns the index of the best-matching desired locale in the input Iterable order. ++ * -1 if the list of desired locales is empty or if none matched well enough. ++ * ++ * @return the index of the best-matching desired locale, or -1. ++ * @draft ICU 65 ++ */ ++ inline int32_t getDesiredIndex() const { return desiredIndex; } ++ ++ /** ++ * Returns the index of the best-matching supported locale in the ++ * constructor’s or builder’s input order (“set” Collection plus “added” locales). ++ * If the matcher was built from a locale list string, then the iteration order is that ++ * of a LocalePriorityList built from the same string. ++ * -1 if the list of supported locales is empty or if none matched well enough. ++ * ++ * @return the index of the best-matching supported locale, or -1. ++ * @draft ICU 65 ++ */ ++ inline int32_t getSupportedIndex() const { return supportedIndex; } ++ ++ /** ++ * Takes the best-matching supported locale and adds relevant fields of the ++ * best-matching desired locale, such as the -t- and -u- extensions. ++ * May replace some fields of the supported locale. ++ * The result is the locale that should be used for date and number formatting, collation, etc. ++ * Returns the root locale if getSupportedLocale() returns nullptr. ++ * ++ *
Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
++ *
++ * @return a locale combining the best-matching desired and supported locales.
++ * @draft ICU 65
++ */
++ Locale makeResolvedLocale(UErrorCode &errorCode) const;
++
++ private:
++ Result(const Locale *desired, const Locale *supported,
++ int32_t desIndex, int32_t suppIndex, UBool owned) :
++ desiredLocale(desired), supportedLocale(supported),
++ desiredIndex(desIndex), supportedIndex(suppIndex),
++ desiredIsOwned(owned) {}
++
++ Result(const Result &other) = delete;
++ Result &operator=(const Result &other) = delete;
++
++ const Locale *desiredLocale;
++ const Locale *supportedLocale;
++ int32_t desiredIndex;
++ int32_t supportedIndex;
++ UBool desiredIsOwned;
++
++ friend class LocaleMatcher;
++ };
++
++ /**
++ * LocaleMatcher builder.
++ * Movable but not copyable.
++ *
++ * @see LocaleMatcher#builder()
++ * @draft ICU 65
++ */
++ class U_COMMON_API Builder : public UMemory {
++ public:
++ /**
++ * Constructs a builder used in chaining parameters for building a LocaleMatcher.
++ *
++ * @return a new Builder object
++ * @draft ICU 65
++ */
++ Builder() {}
++
++ /**
++ * Move constructor; might modify the source.
++ * This builder will have the same contents that the source builder had.
++ *
++ * @param src Builder to move contents from.
++ * @draft ICU 65
++ */
++ Builder(Builder &&src) U_NOEXCEPT;
++
++ /**
++ * Destructor.
++ *
++ * @draft ICU 65
++ */
++ ~Builder();
++
++ /**
++ * Move assignment; might modify the source.
++ * This builder will have the same contents that the source builder had.
++ *
++ * @param src Builder to move contents from.
++ * @draft ICU 65
++ */
++ Builder &operator=(Builder &&src) U_NOEXCEPT;
++
++ /**
++ * Parses an Accept-Language string
++ * (RFC 2616 Section 14.4),
++ * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
++ * Allows whitespace in more places but does not allow "*".
++ * Clears any previously set/added supported locales first.
++ *
++ * @param locales the Accept-Language string of locales to set
++ * @return this Builder object
++ * @draft ICU 65
++ */
++ Builder &setSupportedLocalesFromListString(StringPiece locales);
++
++ /**
++ * Copies the supported locales, preserving iteration order.
++ * Clears any previously set/added supported locales first.
++ * Duplicates are allowed, and are not removed.
++ *
++ * @param locales the list of locale
++ * @return this Builder object
++ * @draft ICU 65
++ */
++ Builder &setSupportedLocales(Locale::Iterator &locales);
++
++ /**
++ * Copies the supported locales from the begin/end range, preserving iteration order.
++ * Clears any previously set/added supported locales first.
++ * Duplicates are allowed, and are not removed.
++ *
++ * Each of the iterator parameter values must be an
++ * input iterator whose value is convertible to const Locale &.
++ *
++ * @param begin Start of range.
++ * @param end Exclusive end of range.
++ * @return this Builder object
++ * @draft ICU 65
++ */
++ template This is mostly an implementation detail, and the precise values may change over time.
++ * The implementation may use either the maximized forms or the others ones, or both.
++ * The implementation may or may not rely on the forms to be consistent with each other.
++ *
++ * Callers should construct and use a matcher rather than match pairs of locales directly.
++ *
++ * @param desired Desired locale.
++ * @param supported Supported locale.
++ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
++ * or else the function returns immediately. Check for U_FAILURE()
++ * on output or use with function chaining. (See User Guide for details.)
++ * @return value between 0 and 1, inclusive.
++ * @internal (has a known user)
++ */
++ double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
++#endif // U_HIDE_INTERNAL_API
++
++private:
++ LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
++ LocaleMatcher(const LocaleMatcher &other) = delete;
++ LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
++
++ int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
++
++ const XLikelySubtags &likelySubtags;
++ const LocaleDistance &localeDistance;
++ int32_t thresholdDistance;
++ int32_t demotionPerDesiredLocale;
++ ULocMatchFavorSubtag favorSubtag;
++
++ // These are in input order.
++ const Locale ** supportedLocales;
++ LSR *lsrs;
++ int32_t supportedLocalesLength;
++ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
++ UHashtable *supportedLsrToIndex; // Map