diff --git a/src/test/java/org/grobid/core/engines/QuantityParserIntegrationTest.java b/src/test/java/org/grobid/core/engines/QuantityParserIntegrationTest.java index 27c57fcf..90afe705 100644 --- a/src/test/java/org/grobid/core/engines/QuantityParserIntegrationTest.java +++ b/src/test/java/org/grobid/core/engines/QuantityParserIntegrationTest.java @@ -155,8 +155,7 @@ public void testQuantityParser3() throws Exception { @Test - public void testQuantityParser4() throws Exception { -// String text = "\n\nFirst, it was heated to 840°C to form austenite structure and cooled at the speed of 100°C/hour to harden."; + public void testQuantityParserInterfaceExample1() throws Exception { String text = "A 20kg ingot is made in a high frequency induction melting furnace and forged to 30mm in thickness and " + "90mm in width at 850 to 1,150°C. Specimens No.2 to 4, 6 and 15 are materials embodying the invention. " + "Others are for comparison. No.1 is a material equivalent to ASTM standard A469-88 class 8 for generator rotor " + @@ -184,6 +183,103 @@ public void testQuantityParser4() throws Exception { assertThat(text.substring(offsetStart1, offsetEnd1), is("100")); } + @Test + public void testQuantityParserInterfaceExample2() throws Exception { + String text = "The cells were washed three times with RPMI1640 medium (Nissui Pharmaceutical Co.). " + + "The cells (1 x107) were incubated in RPMI-1640 medium containing 10% calf fetal serum (Gibco Co.), " + + "50 µg/ml streptomycin, 50 IU/ml of penicillin, 2-mercaptoethanol (5 x 10-5 M), sheep red blood " + + "cells (5 x 106 cells) and a test compound dissolved in dimethyl sulfoxide supplied on a microculture " + + "plate (NUNC Co., 24 wells) in a carbon dioxide gas incubator (TABAI ESPEC CORP) at 37°C for 5 days.\n" + + "\n" + + "A solution of 1.18 g (4.00 mmols) of the Compound a obtained in Reference Example 1, 0.39 g (4.13 mmols) " + + "of 4-aminopyridine and 20 ml of toluene was heated to reflux for 2 hours. After cooling, the reaction " + + "mixture was poured into 1 N sodium hydroxide aqueous solution, and washed twice with chloroform. " + + "2 N Hydrochloric acid aqueous solution was added to the aqueous layer and the precipitated white crystals " + + "were filtered and dried to give 0.73 g (yield: 53%) of Compound 3."; + List measurements = target.process(text); + + assertThat(measurements, hasSize(17)); + assertThat(measurements.get(0).getQuantityAtomic().getParsedValue().getNumeric().toPlainString(), is("3.0")); + assertThat(measurements.get(5).getQuantityAtomic().getRawValue(), is("5 x 10-5")); + assertThat(measurements.get(5).getQuantityAtomic().getParsedValue().getNumeric().toPlainString(), is("0.0000500000")); + + int offsetStart0 = measurements.get(4).getQuantityAtomic().getOffsetStart(); + int offsetEnd0 = measurements.get(4).getQuantityAtomic().getOffsetEnd(); + + assertThat(text.substring(offsetStart0, offsetEnd0), is("50")); + + int offsetStart1 = measurements.get(9).getQuantityAtomic().getOffsetStart(); + int offsetEnd1 = measurements.get(9).getQuantityAtomic().getOffsetEnd(); + + assertThat(text.substring(offsetStart1, offsetEnd1), is("1.18")); + + int offsetStartFull = measurements.get(9).getRawOffsets().start; + int offsetEndFull = measurements.get(9).getRawOffsets().end; + + assertThat(text.substring(offsetStartFull, offsetEndFull), is("1.18 g")); + + assertThat(measurements.get(9).getRawString(), is("1.18 g")); + } + + @Test + public void testQuantityParserInterfaceExample3() throws Exception { + String text = "Fifty-three journals were collected: 13 were eliminated from analysis, because they were " + + "incomplete, unclear or unreadable. 40 journals were analysed: 19 were journals of subjects of race Z " + + "(4 women and 15 men, 30 ± 10 years, 176 ± 7 cm, 70 ± 9 kg, 15 ± 5 % of fat mass, VO 2max : 50 ± 8 ml · " + + "kg −1 · min −1 and 21 of race A (6 women and 15 men, 40 ± 7 years, 176 ± 7 cm, 72 ± 10 kg, 18 ± 8 % fat " + + "mass, VO 2max : 58 ± 8 ml · kg −1 · min −1 ). Energy, macronutrients (CHO, fat and proteins) and liquid " + + "intakes were analysed."; + + List measurements = target.process(text); + + assertThat(measurements, hasSize(19)); + assertThat(measurements.get(0).getQuantityAtomic().getRawValue(), is("Fifty-three")); + assertThat(measurements.get(0).getQuantityAtomic().getParsedValue().getNumeric().toPlainString(), is("53.0")); + + int offsetQuantityStart0 = measurements.get(6).getQuantityBase().getOffsetStart(); + int offsetQuantityEnd0 = measurements.get(6).getQuantityBase().getOffsetEnd(); + int offsetQuantityStart1 = measurements.get(6).getQuantityRange().getOffsetEnd(); + int offsetQuantityEnd1 = measurements.get(6).getQuantityRange().getOffsetEnd(); + + assertThat(text.substring(offsetQuantityStart0, offsetQuantityEnd1), is("30 ± 10")); + + assertThat(measurements.get(6).getRawString(), is("30 ± 10 years")); + assertThat(text.substring(measurements.get(6).getRawOffsets().start, measurements.get(6).getRawOffsets().end), is("30 ± 10 years")); + } + + @Test + public void testQuantityParserInterfaceExample4() throws Exception { + String text = "COS-7 cells transfected with the indicated plasmids were lysed in Laemmli sample buffer or " + + "the lysis buffer mentioned above. E18.5 mouse brains (ICR) were homogenized in 20 mm HEPES (pH 7.4), " + + "0.1 mm EDTA, 0.1 mm EGTA, 150 mm NaCl, 2 mm MgCl2, 1 mm Na3VO4, 0.4 mm 4-(2-aminoethyl)benzenesulfonyl " + + "fluoride hydrochloride, 10 μg/ml leupeptin, and 1 mm dithiothreitol with a Teflon pestle homogenizer. The " + + "lysates or homogenates were centrifuged at 15,000 × g for 20 min, and the supernatants were used for " + + "immunoprecipitation of Cdk5 with anti-Cdk5 (C8) or anti-p35 (C19). In some cases, immunoprecipitation was " + + "performed with anti-Cdk5 (C8) or anti-p35 (C19) that had been cross-linked to protein A-Sepharose beads " + + "using the Pierce Crosslink IP kit according to the protocol of the manufacturer. The cell extracts were " + + "incubated with 1.5 μg of antibody and 20 μl of protein A-Sepharose beads and rotated overnight at 4 °C. " + + "The beads were washed with washing buffer (25 mm Tris-HCl (pH 7.5), 0.1 mm EDTA, 0.1 mm EGTA, 500 mm NaCl, " + + "0.5% Nonidet P-40, and 1 mm dithiothreitol) five times. The kinase activity of Cdk5 was measured with " + + "histone H1 as a substrate in kinase buffer (10 mm MOPS (pH 6.8), 1 mm MgCl2, 0.1 mm EDTA, and 0.1 mm " + + "EGTA) at 37 °C for 30 min. After SDS-PAGE, phosphorylation was visualized by autoradiography with an " + + "imaging plate."; + List measurements = target.process(text); + + assertThat(measurements, hasSize(30)); + assertThat(measurements.get(4).getQuantityAtomic().getRawValue(), is("150")); + assertThat(measurements.get(5).getQuantityAtomic().getRawValue(), is("2")); + + int offsetStart0 = measurements.get(4).getQuantityAtomic().getOffsetStart(); + int offsetEnd0 = measurements.get(4).getQuantityAtomic().getOffsetEnd(); + + assertThat(text.substring(offsetStart0, offsetEnd0), is("150")); + + int offsetStart1 = measurements.get(5).getQuantityAtomic().getOffsetStart(); + int offsetEnd1 = measurements.get(5).getQuantityAtomic().getOffsetEnd(); + + assertThat(text.substring(offsetStart1, offsetEnd1), is("2")); + } + @Test public void testQuantityParser5() throws Exception { String text = "First, it was heated to 840°C to form austenite structure and cooled \n\nat the speed of 100°C/hour to harden."; @@ -222,44 +318,44 @@ public void testQuantityParser_particularCaseWhereIntervalsAreMerged() throws Ex List tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken(text); String result = "Before\tbefore\tB\tBe\tBef\tBefo\te\tre\tore\tfore\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "1920\t1920\t1\t19\t192\t1920\t0\t20\t920\t1920\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdddd\td\t0\t0\tI-\n" + - "s\ts\ts\ts\ts\ts\ts\ts\ts\ts\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "number\tnumber\tn\tnu\tnum\tnumb\tr\ter\tber\tmber\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "stages\tstages\ts\tst\tsta\tstag\ts\tes\tges\tages\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "usually\tusually\tu\tus\tusu\tusua\ty\tly\tlly\tally\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "most\tmost\tm\tmo\tmos\tmost\tt\tst\tost\tmost\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "riders\triders\tr\tri\trid\tride\ts\trs\ters\tders\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "enjoyed\tenjoyed\te\ten\tenj\tenjo\td\ted\tyed\toyed\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "least\tleast\tl\tle\tlea\tleas\tt\tst\tast\teast\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "one\tone\to\ton\tone\tone\te\tne\tone\tone\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t1\tI-\n" + - "day\tday\td\tda\tday\tday\ty\tay\tday\tday\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\tI-\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "rest\trest\tr\tre\tres\trest\tt\tst\test\trest\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "after\tafter\ta\taf\taft\tafte\tr\ter\tter\tfter\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "each\teach\te\tea\teac\teach\th\tch\tach\teach\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "stage\tstage\ts\tst\tsta\tstag\te\tge\tage\ttage\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "1920\t1920\t1\t19\t192\t1920\t0\t20\t920\t1920\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdddd\td\t0\t0\tI-\n" + + "s\ts\ts\ts\ts\ts\ts\ts\ts\ts\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "number\tnumber\tn\tnu\tnum\tnumb\tr\ter\tber\tmber\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "stages\tstages\ts\tst\tsta\tstag\ts\tes\tges\tages\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "usually\tusually\tu\tus\tusu\tusua\ty\tly\tlly\tally\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + + "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "most\tmost\tm\tmo\tmos\tmost\tt\tst\tost\tmost\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "riders\triders\tr\tri\trid\tride\ts\trs\ters\tders\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "enjoyed\tenjoyed\te\ten\tenj\tenjo\td\ted\tyed\toyed\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "least\tleast\tl\tle\tlea\tleas\tt\tst\tast\teast\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "one\tone\to\ton\tone\tone\te\tne\tone\tone\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t1\tI-\n" + + "day\tday\td\tda\tday\tday\ty\tay\tday\tday\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\tI-\n" + + "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "rest\trest\tr\tre\tres\trest\tt\tst\test\trest\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "after\tafter\ta\taf\taft\tafte\tr\ter\tter\tfter\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "each\teach\te\tea\teac\teach\th\tch\tach\teach\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "stage\tstage\ts\tst\tsta\tstag\te\tge\tage\ttage\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; List measurementList = target.extractMeasurement(tokens, result, target.getSentencesOffsets(tokens)); assertThat(measurementList, hasSize(3)); } - + @Test public void testEntitiesExtractionWithParticularCase() throws Exception { String text = "Through EPD technique, we can increase the coercivity by more than 6.5 kOe with less than 1.2 wt.% Dy."; List tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken(text); - + String result = "Through\tthrough\tT\tTh\tThr\tThro\th\tgh\tugh\tough\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + "EPD\tepd\tE\tEP\tEPD\tEPD\tD\tPD\tEPD\tEPD\tALLCAPS\tNODIGIT\t0\tNOPUNCT\tXXX\tX\t0\t0\t\n" + "technique\ttechnique\tt\tte\ttec\ttech\te\tue\tque\tique\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + @@ -305,32 +401,32 @@ public void testReconstructionWithSentenceTokenizer() throws Exception { List tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken(text); String result = "Before\tbefore\tB\tBe\tBef\tBefo\te\tre\tore\tfore\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "1920\t1920\t1\t19\t192\t1920\t0\t20\t920\t1920\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdddd\td\t0\t0\tI-\n" + - "s\ts\ts\ts\ts\ts\ts\ts\ts\ts\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "number\tnumber\tn\tnu\tnum\tnumb\tr\ter\tber\tmber\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "stages\tstages\ts\tst\tsta\tstag\ts\tes\tges\tages\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "usually\tusually\tu\tus\tusu\tusua\ty\tly\tlly\tally\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "most\tmost\tm\tmo\tmos\tmost\tt\tst\tost\tmost\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ".\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "The\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "riders\triders\tr\tri\trid\tride\ts\trs\ters\tders\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "enjoyed\tenjoyed\te\ten\tenj\tenjo\td\ted\tyed\toyed\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "least\tleast\tl\tle\tlea\tleas\tt\tst\tast\teast\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "one\tone\to\ton\tone\tone\te\tne\tone\tone\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t1\tI-\n" + - "day\tday\td\tda\tday\tday\ty\tay\tday\tday\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\tI-\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "rest\trest\tr\tre\tres\trest\tt\tst\test\trest\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "after\tafter\ta\taf\taft\tafte\tr\ter\tter\tfter\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "each\teach\te\tea\teac\teach\th\tch\tach\teach\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "stage\tstage\ts\tst\tsta\tstag\te\tge\tage\ttage\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "1920\t1920\t1\t19\t192\t1920\t0\t20\t920\t1920\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdddd\td\t0\t0\tI-\n" + + "s\ts\ts\ts\ts\ts\ts\ts\ts\ts\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "number\tnumber\tn\tnu\tnum\tnumb\tr\ter\tber\tmber\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "stages\tstages\ts\tst\tsta\tstag\ts\tes\tges\tages\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "usually\tusually\tu\tus\tusu\tusua\ty\tly\tlly\tally\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "15\t15\t1\t15\t15\t15\t5\t15\t15\t15\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + + "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "most\tmost\tm\tmo\tmos\tmost\tt\tst\tost\tmost\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ".\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "The\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "riders\triders\tr\tri\trid\tride\ts\trs\ters\tders\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "enjoyed\tenjoyed\te\ten\tenj\tenjo\td\ted\tyed\toyed\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "at\tat\ta\tat\tat\tat\tt\tat\tat\tat\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "least\tleast\tl\tle\tlea\tleas\tt\tst\tast\teast\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "one\tone\to\ton\tone\tone\te\tne\tone\tone\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t1\tI-\n" + + "day\tday\td\tda\tday\tday\ty\tay\tday\tday\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t1\t0\tI-\n" + + "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "rest\trest\tr\tre\tres\trest\tt\tst\test\trest\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "after\tafter\ta\taf\taft\tafte\tr\ter\tter\tfter\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "each\teach\te\tea\teac\teach\th\tch\tach\teach\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "stage\tstage\ts\tst\tsta\tstag\te\tge\tage\ttage\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; List sentences = Arrays.asList(new OffsetPosition(0, 61), new OffsetPosition(61, 123)); List measurementList = target.extractMeasurement(tokens, result, sentences); @@ -349,30 +445,30 @@ public void testReconstrictuingListWithMiddleUnit2() throws Exception { List tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken(text); - String result = "The\tthe\tT\tTh\tThe\tThe\te\the\tThe\tThe\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxx\tXx\t0\t0\t\n" + - "acidity\tacidity\ta\tac\taci\tacid\ty\tty\tity\tdity\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "pH\tph\tp\tpH\tpH\tpH\tH\tpH\tpH\tpH\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txX\txX\t1\t0\tI-\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "3\t3\t3\t3\t3\t3\t3\t3\t3\t3\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "4\t4\t4\t4\t4\t4\t4\t4\t4\t4\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "5\t5\t5\t5\t5\t5\t5\t5\t5\t5\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "6\t6\t6\t6\t6\t6\t6\t6\t6\t6\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "it\tit\ti\tit\tit\tit\tt\tit\tit\tit\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "correlated\tcorrelated\tc\tco\tcor\tcorr\td\ted\tted\tated\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "with\twith\tw\twi\twit\twith\th\tth\tith\twith\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "interesting\tinteresting\ti\tin\tint\tinte\tg\tng\ting\tting\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "power\tpower\tp\tpo\tpow\tpowe\tr\ter\twer\tower\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t1\t0\t\n" + - "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + - "23\t23\t2\t23\t23\t23\t3\t23\t23\t23\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + - "W\tw\tW\tW\tW\tW\tW\tW\tW\tW\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; + String result = "The\tthe\tT\tTh\tThe\tThe\te\the\tThe\tThe\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxx\tXx\t0\t0\t\n" + + "acidity\tacidity\ta\tac\taci\tacid\ty\tty\tity\tdity\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "pH\tph\tp\tpH\tpH\tpH\tH\tpH\tpH\tpH\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txX\txX\t1\t0\tI-\n" + + "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "3\t3\t3\t3\t3\t3\t3\t3\t3\t3\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "4\t4\t4\t4\t4\t4\t4\t4\t4\t4\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "5\t5\t5\t5\t5\t5\t5\t5\t5\t5\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "6\t6\t6\t6\t6\t6\t6\t6\t6\t6\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "it\tit\ti\tit\tit\tit\tt\tit\tit\tit\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "correlated\tcorrelated\tc\tco\tcor\tcorr\td\ted\tted\tated\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "with\twith\tw\twi\twit\twith\th\tth\tith\twith\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "interesting\tinteresting\ti\tin\tint\tinte\tg\tng\ting\tting\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "power\tpower\tp\tpo\tpow\tpowe\tr\ter\twer\tower\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t1\t0\t\n" + + "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t1\t0\t\n" + + "23\t23\t2\t23\t23\t23\t3\t23\t23\t23\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\tI-\n" + + "W\tw\tW\tW\tW\tW\tW\tW\tW\tW\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t"; List measurementList = target.extractMeasurement(tokens, result); @@ -385,121 +481,121 @@ public void testReconstrictuingListWithMiddleUnit2() throws Exception { public void testReconstrictuingListWithMiddleUnit() throws Exception { String text = "Taking T c ¼ 2:30, 1.79, and 1.51 K, we obtain 1.36, 1.01, \n" + - "and 0.47 T for their corresponding pressures. In the \n" + - "Ginzburg-Landau (GL) theory, H c2 ¼ È 0 =2 2 , where \n" + - "is the coherence length and is proportional to \n" + - "ffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi \n" + - "ð1 þ t 2 Þ=ð1 À t 2 Þ \n" + - "p \n" + - ", È 0 is the flux quantum, and t ¼ T =T c \n" + - "is the reduced temperature. Combining terms gives \n" + - "\n"; + "and 0.47 T for their corresponding pressures. In the \n" + + "Ginzburg-Landau (GL) theory, H c2 ¼ È 0 =2 2 , where \n" + + "is the coherence length and is proportional to \n" + + "ffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi \n" + + "ð1 þ t 2 Þ=ð1 À t 2 Þ \n" + + "p \n" + + ", È 0 is the flux quantum, and t ¼ T =T c \n" + + "is the reduced temperature. Combining terms gives \n" + + "\n"; List tokens = QuantityAnalyzer.getInstance().tokenizeWithLayoutToken(text); String result = "Taking\ttaking\tT\tTa\tTak\tTaki\tg\tng\ting\tking\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + - "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ":\t:\t:\t:\t:\t:\t:\t:\t:\t:\tALLCAPS\tNODIGIT\t1\tPUNCT\t:\t:\t0\t0\t\n" + - "30\t30\t3\t30\t30\t30\t0\t30\t30\t30\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "79\t79\t7\t79\t79\t79\t9\t79\t79\t79\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "51\t51\t5\t51\t51\t51\t1\t51\t51\t51\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - "K\tk\tK\tK\tK\tK\tK\tK\tK\tK\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "we\twe\tw\twe\twe\twe\te\twe\twe\twe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "obtain\tobtain\to\tob\tobt\tobta\tn\tin\tain\ttain\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "36\t36\t3\t36\t36\t36\t6\t36\t36\t36\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "01\t01\t0\t01\t01\t01\t1\t01\t01\t01\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "47\t47\t4\t47\t47\t47\t7\t47\t47\t47\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + - "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + - "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "their\ttheir\tt\tth\tthe\tthei\tr\tir\teir\their\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "corresponding\tcorresponding\tc\tco\tcor\tcorr\tg\tng\ting\tding\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "pressures\tpressures\tp\tpr\tpre\tpres\ts\tes\tres\tures\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "In\tin\tI\tIn\tIn\tIn\tn\tIn\tIn\tIn\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXx\tXx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "Ginzburg\tginzburg\tG\tGi\tGin\tGinz\tg\trg\turg\tburg\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tALLCAPS\tNODIGIT\t1\tHYPHEN\t-\t-\t0\t0\t\n" + - "Landau\tlandau\tL\tLa\tLan\tLand\tu\tau\tdau\tndau\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "(\t(\t(\t(\t(\t(\t(\t(\t(\t(\tALLCAPS\tNODIGIT\t1\tOPENBRACKET\t(\t(\t0\t0\t\n" + - "GL\tgl\tG\tGL\tGL\tGL\tL\tGL\tGL\tGL\tALLCAPS\tNODIGIT\t0\tNOPUNCT\tXX\tX\t0\t0\t\n" + - ")\t)\t)\t)\t)\t)\t)\t)\t)\t)\tALLCAPS\tNODIGIT\t1\tENDBRACKET\t)\t)\t0\t0\t\n" + - "theory\ttheory\tt\tth\tthe\ttheo\ty\try\tory\teory\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "H\th\tH\tH\tH\tH\tH\tH\tH\tH\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + - "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + - "È\tè\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + - "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "where\twhere\tw\twh\twhe\twher\te\tre\tere\there\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "coherence\tcoherence\tc\tco\tcoh\tcohe\te\tce\tnce\tence\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "length\tlength\tl\tle\tlen\tleng\th\tth\tgth\tngth\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "proportional\tproportional\tp\tpr\tpro\tprop\tl\tal\tnal\tonal\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "ffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi\tffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi\tf\tff\tffi\tffif\ti\tfi\tffi\tiffi\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "ð1\tð1\tð\tð1\tð1\tð1\t1\tð1\tð1\tð1\tNOCAPS\tCONTAINDIGIT\t0\tNOPUNCT\txd\txd\t0\t0\t\n" + - "þ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t0\t0\t\n" + - "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "Þ\tþ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + - "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + - "ð1\tð1\tð\tð1\tð1\tð1\t1\tð1\tð1\tð1\tNOCAPS\tCONTAINDIGIT\t0\tNOPUNCT\txd\txd\t0\t0\t\n" + - "À\tà\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + - "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "Þ\tþ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + - "p\tp\tp\tp\tp\tp\tp\tp\tp\tp\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "È\tè\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + - "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + - "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "flux\tflux\tf\tfl\tflu\tflux\tx\tux\tlux\tflux\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "quantum\tquantum\tq\tqu\tqua\tquan\tm\tum\ttum\tntum\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + - "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + - "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + - "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + - "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + - "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + - "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + - "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + - "reduced\treduced\tr\tre\tred\tredu\td\ted\tced\tuced\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "temperature\ttemperature\tt\tte\ttem\ttemp\te\tre\ture\tture\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + - "Combining\tcombining\tC\tCo\tCom\tComb\tg\tng\ting\tning\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + - "terms\tterms\tt\tte\tter\tterm\ts\tms\trms\terms\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + - "gives\tgives\tg\tgi\tgiv\tgive\ts\tes\tves\tives\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t"; + "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + + "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ":\t:\t:\t:\t:\t:\t:\t:\t:\t:\tALLCAPS\tNODIGIT\t1\tPUNCT\t:\t:\t0\t0\t\n" + + "30\t30\t3\t30\t30\t30\t0\t30\t30\t30\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "79\t79\t7\t79\t79\t79\t9\t79\t79\t79\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "51\t51\t5\t51\t51\t51\t1\t51\t51\t51\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + "K\tk\tK\tK\tK\tK\tK\tK\tK\tK\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "we\twe\tw\twe\twe\twe\te\twe\twe\twe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "obtain\tobtain\to\tob\tobt\tobta\tn\tin\tain\ttain\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "36\t36\t3\t36\t36\t36\t6\t36\t36\t36\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "01\t01\t0\t01\t01\t01\t1\t01\t01\t01\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "47\t47\t4\t47\t47\t47\t7\t47\t47\t47\tNOCAPS\tALLDIGIT\t0\tNOPUNCT\tdd\td\t0\t0\t\n" + + "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\tI-\n" + + "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "their\ttheir\tt\tth\tthe\tthei\tr\tir\teir\their\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "corresponding\tcorresponding\tc\tco\tcor\tcorr\tg\tng\ting\tding\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "pressures\tpressures\tp\tpr\tpre\tpres\ts\tes\tres\tures\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "In\tin\tI\tIn\tIn\tIn\tn\tIn\tIn\tIn\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXx\tXx\t0\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "Ginzburg\tginzburg\tG\tGi\tGin\tGinz\tg\trg\turg\tburg\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + + "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tALLCAPS\tNODIGIT\t1\tHYPHEN\t-\t-\t0\t0\t\n" + + "Landau\tlandau\tL\tLa\tLan\tLand\tu\tau\tdau\tndau\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + + "(\t(\t(\t(\t(\t(\t(\t(\t(\t(\tALLCAPS\tNODIGIT\t1\tOPENBRACKET\t(\t(\t0\t0\t\n" + + "GL\tgl\tG\tGL\tGL\tGL\tL\tGL\tGL\tGL\tALLCAPS\tNODIGIT\t0\tNOPUNCT\tXX\tX\t0\t0\t\n" + + ")\t)\t)\t)\t)\t)\t)\t)\t)\t)\tALLCAPS\tNODIGIT\t1\tENDBRACKET\t)\t)\t0\t0\t\n" + + "theory\ttheory\tt\tth\tthe\ttheo\ty\try\tory\teory\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "H\th\tH\tH\tH\tH\tH\tH\tH\tH\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + + "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + + "È\tè\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + + "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\tI-\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "where\twhere\tw\twh\twhe\twher\te\tre\tere\there\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "coherence\tcoherence\tc\tco\tcoh\tcohe\te\tce\tnce\tence\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "length\tlength\tl\tle\tlen\tleng\th\tth\tgth\tngth\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "proportional\tproportional\tp\tpr\tpro\tprop\tl\tal\tnal\tonal\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "ffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi\tffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffiffi\tf\tff\tffi\tffif\ti\tfi\tffi\tiffi\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "ð1\tð1\tð\tð1\tð1\tð1\t1\tð1\tð1\tð1\tNOCAPS\tCONTAINDIGIT\t0\tNOPUNCT\txd\txd\t0\t0\t\n" + + "þ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tþ\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t0\t0\t\n" + + "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "Þ\tþ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + + "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + + "ð1\tð1\tð\tð1\tð1\tð1\t1\tð1\tð1\tð1\tNOCAPS\tCONTAINDIGIT\t0\tNOPUNCT\txd\txd\t0\t0\t\n" + + "À\tà\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tÀ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + + "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "2\t2\t2\t2\t2\t2\t2\t2\t2\t2\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "Þ\tþ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tÞ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + + "p\tp\tp\tp\tp\tp\tp\tp\tp\tp\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "È\tè\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tÈ\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t0\t0\t\n" + + "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNOCAPS\tALLDIGIT\t1\tNOPUNCT\td\td\t0\t0\t\n" + + "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "flux\tflux\tf\tfl\tflu\tflux\tx\tux\tlux\tflux\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "quantum\tquantum\tq\tqu\tqua\tquan\tm\tum\ttum\tntum\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tALLCAPS\tNODIGIT\t1\tCOMMA\t,\t,\t0\t0\t\n" + + "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "t\tt\tt\tt\tt\tt\tt\tt\tt\tt\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\t¼\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t¼\t¼\t0\t0\t\n" + + "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + + "=\t=\t=\t=\t=\t=\t=\t=\t=\t=\tALLCAPS\tNODIGIT\t1\tNOPUNCT\t=\t=\t0\t0\t\n" + + "T\tt\tT\tT\tT\tT\tT\tT\tT\tT\tALLCAPS\tNODIGIT\t1\tNOPUNCT\tX\tX\t1\t0\t\n" + + "c\tc\tc\tc\tc\tc\tc\tc\tc\tc\tNOCAPS\tNODIGIT\t1\tNOPUNCT\tx\tx\t1\t0\t\n" + + "is\tis\ti\tis\tis\tis\ts\tis\tis\tis\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txx\tx\t0\t0\t\n" + + "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxx\tx\t0\t0\t\n" + + "reduced\treduced\tr\tre\tred\tredu\td\ted\tced\tuced\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "temperature\ttemperature\tt\tte\ttem\ttemp\te\tre\ture\tture\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t1\tDOT\t.\t.\t0\t0\t\n" + + "Combining\tcombining\tC\tCo\tCom\tComb\tg\tng\ting\tning\tINITCAP\tNODIGIT\t0\tNOPUNCT\tXxxx\tXx\t0\t0\t\n" + + "terms\tterms\tt\tte\tter\tterm\ts\tms\trms\terms\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t\n" + + "gives\tgives\tg\tgi\tgiv\tgive\ts\tes\tves\tives\tNOCAPS\tNODIGIT\t0\tNOPUNCT\txxxx\tx\t0\t0\t"; List sentences = new ArrayList<>(); sentences.add(new OffsetPosition(0, 105)); @@ -521,5 +617,4 @@ public void testReconstrictuingListWithMiddleUnit() throws Exception { } - } \ No newline at end of file