Skip to content

Commit

Permalink
Merge pull request #71 from czcorpus/ungroup_fix
Browse files Browse the repository at this point in the history
Fix compressed interval ungrouping
  • Loading branch information
tomachalek authored Mar 2, 2020
2 parents 4db1430 + 029e038 commit 600b0ba
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 27 deletions.
54 changes: 28 additions & 26 deletions export/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,49 +81,54 @@ type RunArgs struct {
// ungroupAndPushBack ungroups (if needed) items encoded in a numeric interval specified by "item".
// All the resulting groupIDs and *mapping.Mapping instances are then added to the back of the
// queue 'q'.
func ungroupAndPushBack(item *mapping.Mapping, currGroup string, groupFilter GroupFilter, q *queue.Deque, attr1 attrib.GoPosAttr, attr2 attrib.GoPosAttr) {
var newGroup string
func ungroupAndPushBack(item *mapping.Mapping, groupFilter GroupFilter, q *queue.Deque, attr1 attrib.GoPosAttr, attr2 attrib.GoPosAttr) {
var newGroup, currGroup string
if item.From.First == -1 {
prevGroupStartIdx := item.To.First
currGroupStartIdx := item.To.First
for i := item.To.First; i <= item.To.Last; i++ {
newGroup = groupFilter.ExtractGroupID(attr2.ID2Str(i))
if newGroup != "" && newGroup != currGroup {
q.PushBack(newGroup, &mapping.Mapping{
From: mapping.PosRange{First: -1, Last: -1},
To: mapping.PosRange{First: prevGroupStartIdx, Last: i - 1},
})
prevGroupStartIdx = i
if newGroup != "" {
if currGroup != "" && newGroup != currGroup {
q.PushBack(currGroup, &mapping.Mapping{
From: mapping.PosRange{First: -1, Last: -1},
To: mapping.PosRange{First: currGroupStartIdx, Last: i - 1},
})
currGroupStartIdx = i
}
currGroup = newGroup
}
}
if newGroup != "" {
q.PushBack(newGroup, &mapping.Mapping{
From: mapping.PosRange{First: -1, Last: -1},
To: mapping.PosRange{First: prevGroupStartIdx, Last: item.To.Last},
To: mapping.PosRange{First: currGroupStartIdx, Last: item.To.Last},
})
}

} else if item.To.First == -1 {
prevGroupStartIdx := item.From.First
currGroupStartIdx := item.From.First
for i := item.From.First; i <= item.From.Last; i++ {
newGroup = groupFilter.ExtractGroupID(attr1.ID2Str(i))
if newGroup != "" && newGroup != currGroup {
q.PushBack(newGroup, &mapping.Mapping{
From: mapping.PosRange{First: prevGroupStartIdx, Last: i - 1},
To: mapping.PosRange{First: -1, Last: -1},
})
prevGroupStartIdx = i
if newGroup != "" {
if currGroup != "" && newGroup != currGroup {
q.PushBack(currGroup, &mapping.Mapping{
From: mapping.PosRange{First: currGroupStartIdx, Last: i - 1},
To: mapping.PosRange{First: -1, Last: -1},
})
currGroupStartIdx = i
}
currGroup = newGroup
}
}
if newGroup != "" {
q.PushBack(newGroup, &mapping.Mapping{
From: mapping.PosRange{First: prevGroupStartIdx, Last: item.From.Last},
From: mapping.PosRange{First: currGroupStartIdx, Last: item.From.Last},
To: mapping.PosRange{First: -1, Last: -1},
})
}

} else {
currGroup = groupFilter.ExtractGroupID(attr1.ID2Str(item.From.First))
q.PushBack(currGroup, item)
}
}
Expand Down Expand Up @@ -166,20 +171,17 @@ func Run(args RunArgs) {
}
newGroup1 = getGroupIdent(&item, groupFilter, args.Attr1, args.Attr2)
if newGroup1 != "" {
if currGroups.Size() == 0 {
currGroups.PushBack(newGroup1, &item)
fmt.Println(createGroupTag(lang1, lang2, newGroup1))
fmt.Println(createTag(args.Corp1, args.Attr1, args.Corp2, args.Attr2, &item))

} else if newGroup1 == currGroups.FrontGroup() {
//fmt.Println("ITEM: ", newGroup1, item)
//fmt.Println(" deque: ", currGroups)
if newGroup1 == currGroups.FrontGroup() {
tmp, err := currGroups.PopFront()
if err != nil {
log.Fatal("FATAL: ", err)
}
fmt.Println(createTag(args.Corp1, args.Attr1, args.Corp2, args.Attr2, tmp.Mapping))
currGroups.PushFront(newGroup1, &item) // !! here we assume that item does not contain multiple docs

} else if newGroup1 == currGroups.BackGroup() && currGroups.Size() > 1 {
} else if newGroup1 == currGroups.BackGroup() {
last, err := currGroups.PopBack()
if err != nil {
log.Fatal("FATAL: ", err)
Expand All @@ -191,7 +193,7 @@ func Run(args RunArgs) {
currGroups.PushFront(newGroup1, &item) // !! here we assume that item does not contain multiple docs

} else {
ungroupAndPushBack(&item, newGroup1, groupFilter, currGroups, args.Attr1, args.Attr2)
ungroupAndPushBack(&item, groupFilter, currGroups, args.Attr1, args.Attr2)
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion export/queue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package queue

import (
"fmt"
"log"
"strings"

"github.com/czcorpus/ictools/mapping"
Expand Down Expand Up @@ -59,7 +60,7 @@ func (q *Deque) Size() int {
// The complexity is O(1).
func (q *Deque) PushBack(groupID string, mp *mapping.Mapping) {
if groupID == "" {
panic("")
log.Fatalf("FATAL: entering empty group for mapping %v", mp)
}
n := &Element{GroupID: groupID, Mapping: mp}
if q.first == nil {
Expand Down

0 comments on commit 600b0ba

Please sign in to comment.