Skip to content

Commit

Permalink
#251 Fix inconsistent indentation and formatting of code
Browse files Browse the repository at this point in the history
- corrects indentation
- removes irrelevant whitespaces or empty lines
- adds Override annotation where applicable / missing
- fixes typos along the path
- simplifies expressions for better readability
  • Loading branch information
mawiesne authored and rzo1 committed Oct 30, 2023
1 parent 600d287 commit 1d59b48
Show file tree
Hide file tree
Showing 364 changed files with 45,701 additions and 48,564 deletions.
693 changes: 351 additions & 342 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/Category.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -19,36 +19,35 @@

import java.util.Iterator;


/**
* An iterable over category objects retrieved by Category.getDescendants()
*
* An {@link Iterable} over category objects retrieved by {@link Category#getDescendants()}.
*/
public class CategoryDescendantsIterable implements Iterable<Category> {

private final Wikipedia wiki;
private final Category startCategory;

/**
* The size of the page buffer.
* With bufferSize = 1, a database connection is needed for retrieving a single article.
* Higher bufferSize gives better performance, but needs memory.
* Initialize it with 25.
*/
private int bufferSize = 25;

public CategoryDescendantsIterable(Wikipedia wiki, Category startCategory) {
this.wiki = wiki;
this.startCategory = startCategory;
}

public CategoryDescendantsIterable(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
this.bufferSize = bufferSize;
this.startCategory = startCategory;
}

public Iterator<Category> iterator() {
return new CategoryDescendantsIterator(wiki, bufferSize, startCategory);
}
private final Wikipedia wiki;
private final Category startCategory;

/*
* The size of the page buffer.
* With bufferSize = 1, a database connection is needed for retrieving a single article.
* Higher bufferSize gives better performance, but needs memory.
* Initialize it with 25.
*/
private int bufferSize = 25;

public CategoryDescendantsIterable(Wikipedia wiki, Category startCategory) {
this.wiki = wiki;
this.startCategory = startCategory;
}

public CategoryDescendantsIterable(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
this.bufferSize = bufferSize;
this.startCategory = startCategory;
}

@Override
public Iterator<Category> iterator() {
return new CategoryDescendantsIterator(wiki, bufferSize, startCategory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -28,155 +28,153 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* An iterator over category objects retrieved by Category.getDescendants()
*
* An {@link Iterator} over category objects retrieved by {@link Category#getDescendants()}.
*/
public class CategoryDescendantsIterator implements Iterator<Category> {

private final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private final Wikipedia wiki;
private final Wikipedia wiki;

private final CategoryBuffer buffer;
private final CategoryBuffer buffer;

/** Contains all category ids that have not been expanded, yet. */
private final Set<Integer> notExpandedCategories;
/**
* Contains all category ids that have not been expanded, yet.
*/
private final Set<Integer> notExpandedCategories;

/** As we do not inspect the whole graph at once now, we need a way to check whether a node was already expanded, to avoid infinite loops. */
private final Set<Integer> expandedCategoryIds;
/**
* As we do not inspect the whole graph at once now, we need a way to check whether a node was already expanded, to avoid infinite loops.
*/
private final Set<Integer> expandedCategoryIds;

public CategoryDescendantsIterator(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
buffer = new CategoryBuffer(bufferSize);
notExpandedCategories = new HashSet<>();
// initialize with children of start category
for (Category catItem : startCategory.getChildren()) {
notExpandedCategories.add(catItem.getPageId());
}
public CategoryDescendantsIterator(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
buffer = new CategoryBuffer(bufferSize);
notExpandedCategories = new HashSet<>();
// initialize with children of start category
for (Category catItem : startCategory.getChildren()) {
notExpandedCategories.add(catItem.getPageId());
}

expandedCategoryIds = new HashSet<>();
expandedCategoryIds = new HashSet<>();
}

@Override
public boolean hasNext() {
return buffer.hasNext();
}

@Override
public Category next() {
return buffer.next();
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}

/**
* Buffers categories in a list.
*/
class CategoryBuffer {

private final List<Category> buffer;
private final int maxBufferSize; // the number of pages to be buffered after a query to the database.
private int bufferFillSize; // even a 500 slot buffer can be filled with only 5 elements
private int bufferOffset; // the offset in the buffer
private int dataOffset; // the overall offset in the data

public CategoryBuffer(int bufferSize) {
this.maxBufferSize = bufferSize;
this.buffer = new ArrayList<>();
this.bufferFillSize = 0;
this.bufferOffset = 0;
this.dataOffset = 0;

//TODO test whether this works when zero pages are retrieved
// we can test this here using a unit test that retrieves no descendants!
}

public boolean hasNext(){
return buffer.hasNext();
/**
* If there are elements in the buffer left, then return true.
* If the end of the filled buffer is reached, then try to load new buffer.
*
* @return True, if there are pages left. False otherwise.
*/
public boolean hasNext() {
if (bufferOffset < bufferFillSize) {
return true;
} else {
return this.fillBuffer();
}
}

public Category next(){
return buffer.next();
/**
* @return The next Category or null if no more categories are available.
*/
public Category next() {
// if there are still elements in the buffer, just retrieve the next one
if (bufferOffset < bufferFillSize) {
return this.getBufferElement();
}
// if there are no more elements => try to fill a new buffer
else if (this.fillBuffer()) {
return this.getBufferElement();
} else {
// if it cannot be filled => return null
return null;
}
}

public void remove() {
throw new UnsupportedOperationException();
private Category getBufferElement() {
Category cat = buffer.get(bufferOffset);
bufferOffset++;
dataOffset++;
return cat;
}

/**
* Buffers categories in a list.
*
*
*/
class CategoryBuffer{

private final List<Category> buffer;
private final int maxBufferSize; // the number of pages to be buffered after a query to the database.
private int bufferFillSize; // even a 500 slot buffer can be filled with only 5 elements
private int bufferOffset; // the offset in the buffer
private int dataOffset; // the overall offset in the data

public CategoryBuffer(int bufferSize){
this.maxBufferSize = bufferSize;
this.buffer = new ArrayList<>();
this.bufferFillSize = 0;
this.bufferOffset = 0;
this.dataOffset = 0;

//TODO test whether this works when zero pages are retrieved
// we can test this here using a unit test that retrieves no descendants!
}
private boolean fillBuffer() {

/**
* If there are elements in the buffer left, then return true.
* If the end of the filled buffer is reached, then try to load new buffer.
* @return True, if there are pages left. False otherwise.
*/
public boolean hasNext(){
if (bufferOffset < bufferFillSize) {
return true;
}
else {
return this.fillBuffer();
}
}
// clear the old buffer and all variables regarding the state of the buffer
buffer.clear();
bufferOffset = 0;
bufferFillSize = 0;

/**
*
* @return The next Category or null if no more categories are available.
*/
public Category next(){
// if there are still elements in the buffer, just retrieve the next one
if (bufferOffset < bufferFillSize) {
return this.getBufferElement();
}
// if there are no more elements => try to fill a new buffer
else if (this.fillBuffer()) {
return this.getBufferElement();
}
else {
// if it cannot be filled => return null
return null;
}
}
// add not expanded categories to queue
List<Integer> queue = new LinkedList<>(notExpandedCategories);

private Category getBufferElement() {
Category cat = buffer.get(bufferOffset);
bufferOffset++;
dataOffset++;
return cat;
}
// expand until buffer size is reached
while (!queue.isEmpty() && buffer.size() < maxBufferSize) {
// remove first element from queue
Category currentCat = wiki.getCategory(queue.get(0));
queue.remove(0);

// if the node was not previously expanded
if (!expandedCategoryIds.contains(currentCat.getPageId())) {
buffer.add(currentCat);
notExpandedCategories.remove(currentCat.getPageId());
expandedCategoryIds.add(currentCat.getPageId());

logger.debug("buf: " + buffer.size());
logger.debug("notExp: " + notExpandedCategories);
logger.debug("exp: " + expandedCategoryIds);

private boolean fillBuffer() {

// clear the old buffer and all variables regarding the state of the buffer
buffer.clear();
bufferOffset = 0;
bufferFillSize = 0;

List<Integer> queue = new LinkedList<>();

// add not expanded categories to queue
queue.addAll(notExpandedCategories);

// expand until buffer size is reached
while (!queue.isEmpty() && buffer.size() < maxBufferSize) {
// remove first element from queue
Category currentCat = wiki.getCategory(queue.get(0));
queue.remove(0);

// if the node was not previously expanded
if (!expandedCategoryIds.contains(currentCat.getPageId())) {
buffer.add(currentCat);
notExpandedCategories.remove(currentCat.getPageId());
expandedCategoryIds.add(currentCat.getPageId());

logger.debug("buf: " + buffer.size());
logger.debug("notExp: " + notExpandedCategories);
logger.debug("exp: " + expandedCategoryIds);

for (Category child : currentCat.getChildren()) {
queue.add(child.getPageId());
notExpandedCategories.add(child.getPageId());
}
}
}

if (buffer.size() > 0) {
bufferFillSize = buffer.size();
return true;
}
else {
return false;
}
for (Category child : currentCat.getChildren()) {
queue.add(child.getPageId());
notExpandedCategories.add(child.getPageId());
}
}
}

if (buffer.size() > 0) {
bufferFillSize = buffer.size();
return true;
} else {
return false;
}
}
}
}
Loading

0 comments on commit 1d59b48

Please sign in to comment.