From 029e038a0a3794e70b35c8cf364274504fa60998 Mon Sep 17 00:00:00 2001 From: Tomas Machalek Date: Mon, 2 Mar 2020 17:08:50 +0100 Subject: [PATCH] Fix compressed interval ungrouping --- export/export.go | 54 ++++++++++++++++++++++--------------------- export/queue/queue.go | 3 ++- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/export/export.go b/export/export.go index b42b24f..78f91a5 100644 --- a/export/export.go +++ b/export/export.go @@ -81,49 +81,54 @@ type RunArgs struct { // ungroupAndPushBack ungroups (if needed) items encoded in a numeric interval specified by "item". // All the resulting groupIDs and *mapping.Mapping instances are then added to the back of the // queue 'q'. -func ungroupAndPushBack(item *mapping.Mapping, currGroup string, groupFilter GroupFilter, q *queue.Deque, attr1 attrib.GoPosAttr, attr2 attrib.GoPosAttr) { - var newGroup string +func ungroupAndPushBack(item *mapping.Mapping, groupFilter GroupFilter, q *queue.Deque, attr1 attrib.GoPosAttr, attr2 attrib.GoPosAttr) { + var newGroup, currGroup string if item.From.First == -1 { - prevGroupStartIdx := item.To.First + currGroupStartIdx := item.To.First for i := item.To.First; i <= item.To.Last; i++ { newGroup = groupFilter.ExtractGroupID(attr2.ID2Str(i)) - if newGroup != "" && newGroup != currGroup { - q.PushBack(newGroup, &mapping.Mapping{ - From: mapping.PosRange{First: -1, Last: -1}, - To: mapping.PosRange{First: prevGroupStartIdx, Last: i - 1}, - }) - prevGroupStartIdx = i + if newGroup != "" { + if currGroup != "" && newGroup != currGroup { + q.PushBack(currGroup, &mapping.Mapping{ + From: mapping.PosRange{First: -1, Last: -1}, + To: mapping.PosRange{First: currGroupStartIdx, Last: i - 1}, + }) + currGroupStartIdx = i + } currGroup = newGroup } } if newGroup != "" { q.PushBack(newGroup, &mapping.Mapping{ From: mapping.PosRange{First: -1, Last: -1}, - To: mapping.PosRange{First: prevGroupStartIdx, Last: item.To.Last}, + To: mapping.PosRange{First: currGroupStartIdx, Last: item.To.Last}, }) } } else if item.To.First == -1 { - prevGroupStartIdx := item.From.First + currGroupStartIdx := item.From.First for i := item.From.First; i <= item.From.Last; i++ { newGroup = groupFilter.ExtractGroupID(attr1.ID2Str(i)) - if newGroup != "" && newGroup != currGroup { - q.PushBack(newGroup, &mapping.Mapping{ - From: mapping.PosRange{First: prevGroupStartIdx, Last: i - 1}, - To: mapping.PosRange{First: -1, Last: -1}, - }) - prevGroupStartIdx = i + if newGroup != "" { + if currGroup != "" && newGroup != currGroup { + q.PushBack(currGroup, &mapping.Mapping{ + From: mapping.PosRange{First: currGroupStartIdx, Last: i - 1}, + To: mapping.PosRange{First: -1, Last: -1}, + }) + currGroupStartIdx = i + } currGroup = newGroup } } if newGroup != "" { q.PushBack(newGroup, &mapping.Mapping{ - From: mapping.PosRange{First: prevGroupStartIdx, Last: item.From.Last}, + From: mapping.PosRange{First: currGroupStartIdx, Last: item.From.Last}, To: mapping.PosRange{First: -1, Last: -1}, }) } } else { + currGroup = groupFilter.ExtractGroupID(attr1.ID2Str(item.From.First)) q.PushBack(currGroup, item) } } @@ -166,12 +171,9 @@ func Run(args RunArgs) { } newGroup1 = getGroupIdent(&item, groupFilter, args.Attr1, args.Attr2) if newGroup1 != "" { - if currGroups.Size() == 0 { - currGroups.PushBack(newGroup1, &item) - fmt.Println(createGroupTag(lang1, lang2, newGroup1)) - fmt.Println(createTag(args.Corp1, args.Attr1, args.Corp2, args.Attr2, &item)) - - } else if newGroup1 == currGroups.FrontGroup() { + //fmt.Println("ITEM: ", newGroup1, item) + //fmt.Println(" deque: ", currGroups) + if newGroup1 == currGroups.FrontGroup() { tmp, err := currGroups.PopFront() if err != nil { log.Fatal("FATAL: ", err) @@ -179,7 +181,7 @@ func Run(args RunArgs) { fmt.Println(createTag(args.Corp1, args.Attr1, args.Corp2, args.Attr2, tmp.Mapping)) currGroups.PushFront(newGroup1, &item) // !! here we assume that item does not contain multiple docs - } else if newGroup1 == currGroups.BackGroup() && currGroups.Size() > 1 { + } else if newGroup1 == currGroups.BackGroup() { last, err := currGroups.PopBack() if err != nil { log.Fatal("FATAL: ", err) @@ -191,7 +193,7 @@ func Run(args RunArgs) { currGroups.PushFront(newGroup1, &item) // !! here we assume that item does not contain multiple docs } else { - ungroupAndPushBack(&item, newGroup1, groupFilter, currGroups, args.Attr1, args.Attr2) + ungroupAndPushBack(&item, groupFilter, currGroups, args.Attr1, args.Attr2) } } } diff --git a/export/queue/queue.go b/export/queue/queue.go index 03ebe55..b0e94d1 100644 --- a/export/queue/queue.go +++ b/export/queue/queue.go @@ -19,6 +19,7 @@ package queue import ( "fmt" + "log" "strings" "github.com/czcorpus/ictools/mapping" @@ -59,7 +60,7 @@ func (q *Deque) Size() int { // The complexity is O(1). func (q *Deque) PushBack(groupID string, mp *mapping.Mapping) { if groupID == "" { - panic("") + log.Fatalf("FATAL: entering empty group for mapping %v", mp) } n := &Element{GroupID: groupID, Mapping: mp} if q.first == nil {