diff --git a/.gitignore b/.gitignore index 2f295e5f..bbfcc8c8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ eval* .pytest_cache .tox docs_api +.idea +venv +dump.txt.gz diff --git a/.travis.yml b/.travis.yml index 705f3ee6..b08023f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,8 @@ language: python python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" + - "3.7" + - "3.8" install: - pip install tox-travis diff --git a/dump.txt.gz b/dump.txt.gz deleted file mode 100644 index b3b4af48..00000000 Binary files a/dump.txt.gz and /dev/null differ diff --git a/examples/advanced.py b/examples/advanced.py index 25d17076..08776949 100644 --- a/examples/advanced.py +++ b/examples/advanced.py @@ -1,5 +1,3 @@ -from __future__ import print_function - from koala.ExcelCompiler import ExcelCompiler from koala.Spreadsheet import Spreadsheet from koala.excellib import xsum diff --git a/examples/basic.py b/examples/basic.py index ddeb323c..280934bd 100644 --- a/examples/basic.py +++ b/examples/basic.py @@ -1,5 +1,3 @@ -from __future__ import print_function - from koala.ExcelCompiler import ExcelCompiler from koala.Spreadsheet import Spreadsheet diff --git a/koala/Cell.py b/koala/Cell.py index df893ccd..3dd97d9c 100644 --- a/koala/Cell.py +++ b/koala/Cell.py @@ -1,13 +1,9 @@ # cython: profile=True -from __future__ import absolute_import, division - from koala.CellBase import CellBase from koala.Range import RangeCore from koala.utils import * -from openpyxl.compat import unicode - class Cell(CellBase): ctr = 0 @@ -62,11 +58,7 @@ def __init__( self.__row = None self.__col_idx = None - # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` - if type(formula) == str and str != unicode: - self.__formula = unicode(formula, 'utf-8') if formula else None - else: - self.__formula = formula if formula else None + self.__formula = formula if formula else None self.__value = value self.python_expression = None diff --git a/koala/ExcelCompiler.py b/koala/ExcelCompiler.py index 8d228a34..a5eab3b8 100644 --- a/koala/ExcelCompiler.py +++ b/koala/ExcelCompiler.py @@ -1,15 +1,5 @@ -from __future__ import print_function # cython: profile=True -import os.path - -import networkx - -from koala.reader import read_archive, read_named_ranges, read_cells -from koala.utils import * -from koala.ast import graph_from_seeds, shunting_yard, build_ast, prepare_pointer -from koala.Cell import Cell -from koala.Range import RangeFactory from koala.Spreadsheet import Spreadsheet import warnings @@ -20,8 +10,6 @@ class ExcelCompiler(object): """ def __init__(self, file, ignore_sheets = [], ignore_hidden = False, debug = False): - # print("___### Initializing Excel Compiler ###___") - warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet instead.", PendingDeprecationWarning diff --git a/koala/Range.py b/koala/Range.py index f3c2c0cd..a4809caa 100644 --- a/koala/Range.py +++ b/koala/Range.py @@ -1,12 +1,8 @@ -from __future__ import absolute_import, division, print_function - +import logging from koala.CellBase import CellBase from koala.ExcelError import ErrorCodes, ExcelError from koala.utils import * -from openpyxl.compat import unicode - - # WARNING: Range should never be imported directly. Import Range from excelutils instead. ### Range Utils ### @@ -58,7 +54,7 @@ def check_value(a): return ExcelError(a) try: # This is to avoid None or Exception returned by Range operations - if isinstance(a, (unicode, str)): + if isinstance(a, str): return a elif float(a): return a @@ -122,7 +118,6 @@ def __build( cells, nrows, ncols = resolve_range( reference, should_flatten=True) except Exception as e: - print('Pb with ref', reference, e) return ValueError('Range ERROR') # Will still be considered as a Range object, since we are inside __init__... origin = parse_cell_address(cells[0]) if len(cells) > 0 else None # origin of Range @@ -170,7 +165,7 @@ def __build( elif not self.is_pointer: # when building pointers, name shouldn't be updated, but in that case reference is not a dict self.__name = reference else: - print('Pb with Name', reference, name) + logging.debug('Pb with Name', reference, name) self.__origin = origin self.__addresses = cells self.__order = order @@ -570,7 +565,7 @@ def multiply(a, b): @staticmethod def divide(a, b): try: - return old_div(float(check_value(a)), float(check_value(b))) + return float(check_value(a)) / float(check_value(b)) except Exception as e: return ExcelError('#DIV/0!', e) @@ -584,9 +579,9 @@ def power(a, b): @staticmethod def is_equal(a, b): try: - if not isinstance(a, (str, unicode)): + if not isinstance(a, str): a = check_value(a) - if not isinstance(b, (str, unicode)): + if not isinstance(b, str): b = check_value(b) return is_almost_equal(a, b, precision=0.00001) @@ -596,9 +591,9 @@ def is_equal(a, b): @staticmethod def is_not_equal(a, b): try: - if not isinstance(a, (str, unicode)): + if not isinstance(a, str): a = check_value(a) - if not isinstance(a, (str, unicode)): + if not isinstance(a, str): b = check_value(b) return a != b diff --git a/koala/Spreadsheet.py b/koala/Spreadsheet.py index f4ac5db5..d5ab35e4 100644 --- a/koala/Spreadsheet.py +++ b/koala/Spreadsheet.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, print_function # cython: profile=True from koala.Range import get_cell_address, parse_cell_address @@ -13,17 +12,13 @@ from koala.utils import * import warnings +import logging import os.path import networkx from networkx.readwrite import json_graph -from openpyxl.compat import unicode - - class Spreadsheet(object): - def __init__(self, file=None, ignore_sheets=[], ignore_hidden=False, debug=False): - # print("___### Initializing Excel Compiler ###___") - + def __init__(self, file=None, ignore_sheets=[], ignore_hidden=False, include_only_sheets=None, debug=False): if file is None: # create empty version of this object self.cells = None # precursor for cellmap: dict that link addresses (str) to Cell objects. @@ -56,7 +51,7 @@ def __init__(self, file=None, ignore_sheets=[], ignore_hidden=False, debug=False else: # assume file path archive = read_archive(os.path.abspath(file)) # Parse cells - self.cells = read_cells(archive, ignore_sheets, ignore_hidden) + self.cells, self.sheets = read_cells(archive, ignore_sheets, ignore_hidden, include_only_sheets) # Parse named_range { name (ExampleName) -> address (Sheet!A1:A10)} self.named_ranges = read_named_ranges(archive) self.range = RangeFactory(self.cells) @@ -83,8 +78,6 @@ def gen_graph(self, outputs=[], inputs=[]): :param outputs: can be used to specify the outputs. All not affected cells are removed from the graph. :param inputs: can be used to specify the inputs. All not affected cells are removed from the graph. """ - # print('___### Generating Graph ###___') - if len(outputs) == 0: preseeds = set(list(flatten(self.cells.keys())) + list(self.named_ranges.keys())) # to have unicity else: @@ -128,7 +121,6 @@ def gen_graph(self, outputs=[], inputs=[]): seeds.append(self.cells[o]) seeds = set(seeds) - # print("Seeds %s cells" % len(seeds)) outputs = set(preseeds) if len(outputs) > 0 else [] # seeds and outputs are the same when you don't specify outputs cellmap, G = graph_from_seeds(seeds, self) @@ -169,12 +161,6 @@ def gen_graph(self, outputs=[], inputs=[]): inputs = set(inputs) - - # print("Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) - - # undirected = networkx.Graph(G) - # print "Number of connected components %s", str(number_connected_components(undirected)) - if inputs == [] and outputs == []: self.build_spreadsheet(G, cellmap, self.named_ranges, pointers = self.pointers, outputs = outputs, inputs = inputs, debug = self.debug) else: @@ -277,7 +263,7 @@ def cell_add(self, address=None, cell=None, value=None, formula=None): self.cellmap = cellmap self.G = G - print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) + logging.debug("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) def set_formula(self, addr, formula): # previously set_formula was used. Capture this behaviour. @@ -306,7 +292,7 @@ def cell_set_formula(self, address, formula): for index, c in enumerate(cell.range.cells): # for each cell of the range, translate the formula if index == 0: c.formula = formula - translator = Translator(unicode('=' + formula), c.address().split('!')[1]) # the Translator needs a reference without sheet + translator = Translator('=' + formula, c.address().split('!')[1]) # the Translator needs a reference without sheet else: translated = translator.translate_formula(c.address().split('!')[1]) # the Translator needs a reference without sheet c.formula = translated[1:] # to get rid of the '=' @@ -325,11 +311,11 @@ def cell_set_formula(self, address, formula): self.evaluate(address) self.cellmap[address].should_eval = should_eval - print("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) + logging.debug("Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) def prune_graph(self, *args): - print('___### Pruning Graph ###___') + logging.debug('___### Pruning Graph ###___') G = self.G @@ -338,7 +324,6 @@ def prune_graph(self, *args): for input_address in self.inputs: child = self.cellmap[input_address] if child == None: - print("Not found ", input_address) continue g = make_subgraph(G, child, "descending") dependencies = dependencies.union(g.nodes()) @@ -393,7 +378,6 @@ def prune_graph(self, *args): subgraph.add_edge(const_node, current) - print("Graph pruning done, %s nodes, %s edges, %s cellmap entries" % (len(subgraph.nodes()),len(subgraph.edges()),len(new_cellmap))) undirected = networkx.Graph(subgraph) # print "Number of connected components %s", str(number_connected_components(undirected)) # print map(lambda x: x.address(), subgraph.nodes()) @@ -430,7 +414,7 @@ def prune_graph(self, *args): return spreadsheet.build_spreadsheet(subgraph, new_cellmap, self.named_ranges, self.pointers, self.outputs, self.inputs, debug = self.debug) def clean_pointer(self): - print('___### Cleaning Pointers ###___') + logging.debug('___### Cleaning Pointers ###___') new_named_ranges = self.named_ranges.copy() new_cells = self.cellmap.copy() @@ -484,7 +468,7 @@ def clean_pointer(self): for repl in replacements: if type(repl["value"]) == ExcelError: if self.debug: - print('WARNING: Excel error found => replacing with #N/A') + logging.debug('WARNING: Excel error found => replacing with #N/A') repl["value"] = "#N/A" if repl["expression_type"] == "value": @@ -503,7 +487,7 @@ def clean_pointer(self): return new_cells, new_named_ranges def print_value_ast(self, ast,node,indent): - print("%s %s %s %s" % (" "*indent, str(node.token.tvalue), str(node.token.ttype), str(node.token.tsubtype))) + logging.debug("%s %s %s %s" % (" "*indent, str(node.token.tvalue), str(node.token.ttype), str(node.token.tsubtype))) for c in node.children(ast): self.print_value_ast(ast, c, indent+1) @@ -525,7 +509,7 @@ def eval_pointers_from_ast(self, ast, node, cell): except Exception as e: if self.debug: - print('EXCEPTION raised in eval_pointers: EXPR', expression, cell["address"]) + logging.debug('EXCEPTION raised in eval_pointers: EXPR', expression, cell["address"]) raise Exception("Problem evalling: %s for %s, %s" % (e, cell["address"], expression)) return {"formula":pointer_string, "value": pointer_value, "expression_type": expression_type} @@ -701,7 +685,7 @@ def cell_set_value(self, address, value): cell = self.cellmap[address] # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula() - self.fix_cell(address) + self.cell_fix(address) # case where the address refers to a range if cell.is_range: @@ -854,7 +838,7 @@ def cell_free(self, address=None): def print_value_tree(self,addr,indent): cell = self.cellmap[addr] - print("%s %s = %s" % (" "*indent,addr,cell.value)) + logging.debug("%s %s = %s" % (" "*indent,addr,cell.value)) for c in self.G.predecessors_iter(cell): self.print_value_tree(c.address(), indent+1) @@ -892,7 +876,7 @@ def eval_ref(self, addr1, addr2 = None, ref = None): cell1 = self.cellmap[addr1] else: if self.debug: - print('WARNING in eval_ref: address %s not found in cellmap, returning #NULL' % addr1) + logging.warning('WARNING in eval_ref: address %s not found in cellmap, returning #NULL' % addr1) return ExcelError('#NULL', 'Cell %s is empty' % addr1) if addr2 == None: if cell1.is_range: @@ -976,7 +960,7 @@ def cell_evaluate(self, address): cell = self.cellmap[address] except: if self.debug: - print('WARNING: Empty cell at ' + address) + logging.warning('WARNING: Empty cell at ' + address) return ExcelError('#NULL', 'Cell %s is empty' % address) # no formula, fixed value diff --git a/koala/__init__.py b/koala/__init__.py index 012c30aa..322478f4 100644 --- a/koala/__init__.py +++ b/koala/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from openpyxl import * from .ast import * from .Cell import * diff --git a/koala/ast/__init__.py b/koala/ast/__init__.py index 6d1f8d21..03efb107 100644 --- a/koala/ast/__init__.py +++ b/koala/ast/__init__.py @@ -1,12 +1,9 @@ -from __future__ import absolute_import # cython: profile=True import collections -import six import networkx from networkx.classes.digraph import DiGraph -from openpyxl.compat import unicode from koala.utils import uniqueify, flatten, max_dimension, col2num, resolve_range from koala.Cell import Cell @@ -118,7 +115,7 @@ def shunting_yard(expression, named_ranges, ref = None, tokenize_range = False): for index, token in enumerate(tokens): new_tokens.append(token) - if type(token.tvalue) == str or type(token.tvalue) == unicode: + if type(token.tvalue) == str: if token.tvalue.startswith(':'): # example -> :OFFSET( or simply :A10 depth = 0 @@ -139,7 +136,7 @@ def shunting_yard(expression, named_ranges, ref = None, tokenize_range = False): if depth == 0: new_tokens.pop() # these 2 lines are needed to remove INDEX() new_tokens.pop() - expr = six.next(rev).tvalue + expr + expr = next(rev).tvalue + expr break expr += token.tvalue @@ -377,13 +374,11 @@ def cell2code(cell, named_ranges): else: ast = None - if isinstance(cell.value, unicode): - code = u'u"' + cell.value.replace(u'"', u'\\"') + u'"' - elif isinstance(cell.value, str): - raise RuntimeError("Got unexpected non-unicode str") + if isinstance(cell.value, str): + code = '"' + cell.value.replace('"', r'\"') + '"' else: code = str(cell.value) - return code,ast + return code, ast def prepare_pointer(code, names, ref_cell = None): @@ -455,7 +450,8 @@ def graph_from_seeds(seeds, cell_source): # directed graph G = networkx.DiGraph() # match the info in cellmap - for c in cellmap.values(): G.add_node(c) + for c in cellmap.values(): + G.add_node(c) # cells to analyze: only formulas todo = [s for s in seeds if s.formula] diff --git a/koala/ast/astnodes.py b/koala/ast/astnodes.py index 03bceb44..4d10d3d3 100644 --- a/koala/ast/astnodes.py +++ b/koala/ast/astnodes.py @@ -1,26 +1,23 @@ -from __future__ import print_function # cython: profile=True +import logging from networkx import NetworkXError -from openpyxl.compat import unicode - from koala.excellib import FUNCTION_MAP, IND_FUN from koala.utils import is_range, split_range, split_address, resolve_range from koala.ExcelError import * def to_str(my_string): - # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` - if type(my_string) == str and str != unicode: - return unicode(my_string, 'utf-8') - elif type(my_string) == unicode: + if isinstance(my_string, bytes): + return my_string.decode("utf-8") + elif isinstance(my_string, str): return my_string else: try: return str(my_string) - except: - print('Couldnt parse as string', type(my_string)) + except Exception: + logging.warning('Couldnt parse as string', type(my_string)) return my_string # elif isinstance(my_string, (int, float, tuple, Ra): # return str(my_string) @@ -45,7 +42,7 @@ def __getattr__(self, name): def children(self, ast): try: args = ast.predecessors(self) - args = sorted(args, key=lambda x: ast.node[x]['pos']) + args = sorted(args, key=lambda x: ast.nodes[x]['pos']) except NetworkXError: args = '' return args @@ -204,7 +201,7 @@ def get_cells(self): def emit(self,ast,context=None, pointer = False): if isinstance(self.tvalue, ExcelError): if self.debug: - print('WARNING: Excel Error Code found', self.tvalue) + logging.debug('WARNING: Excel Error Code found', self.tvalue) return self.tvalue is_a_range = False @@ -228,7 +225,7 @@ def emit(self,ast,context=None, pointer = False): sh,col,row = split_address(rng) except: if self.debug: - print('WARNING: Unknown address: %s is not a cell/range reference, nor a named range' % to_str(rng)) + logging.warning('WARNING: Unknown address: %s is not a cell/range reference, nor a named range' % to_str(rng)) sh = None if sh: diff --git a/koala/excellib.py b/koala/excellib.py index 46b85008..01172e5a 100644 --- a/koala/excellib.py +++ b/koala/excellib.py @@ -6,10 +6,9 @@ # source: https://github.com/dgorissen/pycel/blob/master/src/pycel/excellib.py -from __future__ import absolute_import, division - import itertools import numpy as np +import numpy_financial as npf import scipy.optimize import datetime import random @@ -18,8 +17,6 @@ from calendar import monthrange from dateutil.relativedelta import relativedelta -from openpyxl.compat import unicode - from koala.utils import * from koala.Range import RangeCore as Range from koala.ExcelError import * @@ -427,7 +424,7 @@ def irr(values, guess = None): raise ValueError('guess value for excellib.irr() is %s and not 0' % guess) else: try: - return np.irr(values) + return npf.irr(values) except Exception as e: return ExcelError('#NUM!', e) @@ -729,7 +726,7 @@ def randbetween(bottom, top): def right(text,n): #TODO: hack to deal with naca section numbers - if isinstance(text, unicode) or isinstance(text,str): + if isinstance(text, str): return text[-n:] else: # TODO: get rid of the decimal diff --git a/koala/reader.py b/koala/reader.py index 8a679204..4a4a9076 100644 --- a/koala/reader.py +++ b/koala/reader.py @@ -1,6 +1,5 @@ -from __future__ import print_function - from io import BytesIO +import logging import re import os import json @@ -101,13 +100,13 @@ def read_named_ranges(archive): return dict -def read_cells(archive, ignore_sheets = [], ignore_hidden = False): +def read_cells(archive, ignore_sheets = [], ignore_hidden = False, include_only_sheets=None): global debug - # print('___### Reading Cells from XLSX ###___') - cells = {} + sheets = [] + functions = set() cts = dict(read_content_types(archive)) @@ -120,12 +119,20 @@ def read_cells(archive, ignore_sheets = [], ignore_hidden = False): else: shared_strings = [] + ignore_sheets = frozenset(ignore_sheets) + + if include_only_sheets is not None: + include_only_sheets = frozenset(include_only_sheets) + for sheet in detect_worksheets(archive): sheet_name = sheet['title'] function_map = {} - if sheet_name in ignore_sheets: continue + if sheet_name in ignore_sheets or (include_only_sheets is not None and sheet_name not in include_only_sheets): + continue + + sheets.append(sheet_name) root = fromstring(archive.read(sheet['path'])) # it is necessary to use cElementTree from xml module, otherwise root.findall doesn't work as it should @@ -159,21 +166,21 @@ def read_cells(archive, ignore_sheets = [], ignore_hidden = False): if not skip: cell = {'a': '%s!%s' % (sheet_name, cell_address), 'f': None, 'v': None} if debug: - print('Cell', cell['a']) + logging.debug('Cell', cell['a']) for child in c: child_data_type = child.get('t', 'n') # if no type assigned, assign 'number' if child.tag == '{%s}f' % SHEET_MAIN_NS : if 'ref' in child.attrib: # the first cell of a shared formula has a 'ref' attribute if debug: - print('*** Found definition of shared formula ***', child.text, child.attrib['ref']) + logging.debug('*** Found definition of shared formula ***', child.text, child.attrib['ref']) if "si" in child.attrib: function_map[child.attrib['si']] = (child.attrib['ref'], Translator(str('=' + child.text), cell_address)) # translator of openpyxl needs a unicode argument that starts with '=' # else: # print "Encountered cell with ref but not si: ", sheet_name, child.attrib['ref'] if child_data_type == 'shared': if debug: - print('*** Found child %s of shared formula %s ***' % (cell_address, child.attrib['si'])) + logging.debug('*** Found child %s of shared formula %s ***' % (cell_address, child.attrib['si'])) ref = function_map[child.attrib['si']][0] formula = function_map[child.attrib['si']][1] @@ -215,17 +222,7 @@ def read_cells(archive, ignore_sheets = [], ignore_hidden = False): else: cells[sheet_name + "!" + cell_address] = Cell(cell_address, sheet_name, value = cell['v'], formula = cleaned_formula, should_eval=should_eval) - # if nb_hidden > 0: - # print('Ignored %i hidden cells in sheet %s' % (nb_hidden, sheet_name)) - - # print('Nb of different functions %i' % len(functions)) - # print(functions) - - # for f in functions: - # if f not in existing: - # print('== Missing function: %s' % f) - - return cells + return cells, sheets def read_rels(archive): diff --git a/koala/serializer.py b/koala/serializer.py index 27dd2d79..dec4c7ad 100644 --- a/koala/serializer.py +++ b/koala/serializer.py @@ -1,13 +1,10 @@ -from __future__ import absolute_import, print_function - import json import gzip +import logging import networkx from networkx.classes.digraph import DiGraph -from networkx.readwrite import json_graph from networkx.drawing.nx_pydot import write_dot -from openpyxl.compat import unicode from koala.Cell import Cell from koala.Range import RangeCore, RangeFactory @@ -46,15 +43,15 @@ def parse_cell_info(cell): is_named_range, is_pointer, should_eval - ]) + u"\n").encode('utf-8')) + ]) + "\n").encode('utf-8')) for cell in simple_cells: parse_cell_info(cell) value = cell.value - if isinstance(value, unicode): + if isinstance(value, str): outfile.write(cell.value.encode('utf-8') + b"\n") else: - outfile.write((str(cell.value) + u"\n").encode('utf-8')) + outfile.write((str(cell.value) + "\n").encode('utf-8')) outfile.write(b"====" + b"\n") outfile.write(b"-----" + b"\n") @@ -63,9 +60,9 @@ def parse_cell_info(cell): parse_cell_info(cell) if cell.range.is_pointer: - outfile.write((json.dumps(cell.range.reference) + u"\n").encode('utf-8')) + outfile.write((json.dumps(cell.range.reference) + "\n").encode('utf-8')) else: - outfile.write((cell.range.name + u"\n").encode('utf-8')) + outfile.write((cell.range.name + "\n").encode('utf-8')) outfile.write(b"====" + b"\n") outfile.write(b"====" + b"\n") @@ -73,15 +70,15 @@ def parse_cell_info(cell): # writing the edges outfile.write(b"edges" + b"\n") for source, target in self.G.edges(): - outfile.write((source.address() + SEP + target.address() + u"\n").encode('utf-8')) + outfile.write((source.address() + SEP + target.address() + "\n").encode('utf-8')) # writing the rest if self.outputs is not None: outfile.write(b"outputs" + b"\n") - outfile.write((SEP.join(self.outputs) + u"\n").encode('utf-8')) + outfile.write((SEP.join(self.outputs) + "\n").encode('utf-8')) if self.inputs is not None: outfile.write(b"inputs" + b"\n") - outfile.write((SEP.join(self.inputs) + u"\n").encode('utf-8')) + outfile.write((SEP.join(self.inputs) + "\n").encode('utf-8')) outfile.write(b"named_ranges" + b"\n") for k in self.named_ranges: outfile.write((k + SEP + self.named_ranges[k] + u"\n").encode('utf-8')) @@ -191,7 +188,7 @@ def to_float(string): G.add_nodes_from(nodes) G.add_edges_from(edges) - print("Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) + logging.debug("Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) return (G, cellmap, named_ranges, pointers, outputs, inputs) @@ -205,41 +202,15 @@ def dump_json(self, fname): def load_json(fname): - def _decode_list(data): - rv = [] - for item in data: - if isinstance(item, unicode) and unicode != str: - item = item.encode('utf-8') - elif isinstance(item, list) and unicode != str: - item = _decode_list(item) - elif isinstance(item, dict): - item = _decode_dict(item) - rv.append(item) - return rv - - def _decode_dict(data): - rv = {} - for key, value in data.items(): - if isinstance(key, unicode) and unicode != str: - key = key.encode('utf-8') - if isinstance(value, unicode) and unicode != str: - value = value.encode('utf-8') - elif isinstance(value, list): - value = _decode_list(value) - elif isinstance(value, dict): - value = _decode_dict(value) - rv[key] = value - return rv - with gzip.GzipFile(fname, 'r') as infile: - data = json.loads(infile.read().decode('utf-8'), object_hook=_decode_dict) + data = json.loads(infile.read().decode('utf-8')) return data ########### based on dot ################# -def export_to_dot(self,fname): - write_dot(self.G,fname) +def export_to_dot(self, fname): + write_dot(self.G, fname) ########### plotting ################# diff --git a/koala/tokenizer.py b/koala/tokenizer.py index 03fbdf45..acdfadf5 100644 --- a/koala/tokenizer.py +++ b/koala/tokenizer.py @@ -1,5 +1,3 @@ -from __future__ import division, print_function - # cython: profile=True #======================================================================== @@ -27,10 +25,8 @@ #======================================================================== import re -import six import collections - -from koala.utils import old_div +import logging #======================================================================== @@ -433,7 +429,7 @@ def EOF(): # standard postfix operators if ("%".find(currentChar()) != -1): if (len(token) > 0): - tokens.add(old_div(float(token), 100), self.TOK_TYPE_OPERAND) + tokens.add(float(token) / 100, self.TOK_TYPE_OPERAND) token = "" else: tokens.add('*', self.TOK_TYPE_OP_IN) @@ -501,9 +497,9 @@ def EOF(): ): pass elif (not( - ((six.next(tokens).ttype == self.TOK_TYPE_FUNCTION) and (tokens.next().tsubtype == self.TOK_SUBTYPE_START)) or - ((six.next(tokens).ttype == self.TOK_TYPE_SUBEXPR) and (tokens.next().tsubtype == self.TOK_SUBTYPE_START)) or - (six.next(tokens).ttype == self.TOK_TYPE_OPERAND) + ((next(tokens).ttype == self.TOK_TYPE_FUNCTION) and (tokens.next().tsubtype == self.TOK_SUBTYPE_START)) or + ((next(tokens).ttype == self.TOK_TYPE_SUBEXPR) and (tokens.next().tsubtype == self.TOK_SUBTYPE_START)) or + (next(tokens).ttype == self.TOK_TYPE_OPERAND) ) ): pass @@ -700,7 +696,7 @@ def shunting_yard(expression): else: tokens.append(t) - print("tokens: ", "|".join([x.tvalue for x in tokens])) + logging.debug("tokens: ", "|".join([x.tvalue for x in tokens])) #http://office.microsoft.com/en-us/excel-help/calculation-operators-and-precedence-HP010078886.aspx operators = {} @@ -728,12 +724,12 @@ def shunting_yard(expression): arg_count = [] def po(): - print("output: ", "|".join([x.tvalue for x in output])) + logging.debug("output: ", "|".join([x.tvalue for x in output])) def so(): - print("stack:", "|".join([x.tvalue for x in stack])) + logging.debug("stack:", "|".join([x.tvalue for x in stack])) for t in tokens: - print(t, t.type) + logging.debug(t, t.type) if t.ttype == "operand": output.append(create_node(t)) @@ -803,7 +799,7 @@ def so(): w = were_values.pop() if w: a += 1 f.num_args = a - print(f, "has ",a," args") + logging.debug(f, "has ",a," args") output.append(f) while stack: @@ -813,5 +809,5 @@ def so(): output.append(create_node(stack.pop())) #print "Stack is: ", "|".join(stack) - print("Output is: ", "|".join([x.tvalue for x in output])) + logging.debug("Output is: ", "|".join([x.tvalue for x in output])) return output diff --git a/koala/utils.py b/koala/utils.py index 626f1980..242d2062 100644 --- a/koala/utils.py +++ b/koala/utils.py @@ -1,20 +1,11 @@ # cython: profile=True -from __future__ import absolute_import, division - -import collections -import numbers +import collections.abc import re import datetime as dt -try: - from functools import lru_cache -except ImportError: # fix for Python 2.7 - from backports.functools_lru_cache import lru_cache -from six import string_types +from functools import lru_cache from copy import deepcopy -from openpyxl.compat import unicode - from .ExcelError import ExcelError ASCII = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -150,12 +141,6 @@ def resolve_range(rng, should_flatten = False, sheet=''): else: pass - # `unicode` != `str` in Python2. See `from openpyxl.compat import unicode` - if type(sheet) == str and str != unicode: - sheet = unicode(sheet, 'utf-8') - if type(rng) == str and str != unicode: - rng = unicode(rng, 'utf-8') - key = rng+str(should_flatten)+sheet if key in resolve_range_cache: @@ -347,10 +332,10 @@ def get_linest_degree(excel,cl): return (max(degree,1),coef) def flatten(l, only_lists = False): - instance = list if only_lists else collections.Iterable + instance = list if only_lists else collections.abc.Iterable for el in l: - if isinstance(el, instance) and not isinstance(el, string_types): + if isinstance(el, instance) and not isinstance(el, str): for sub in flatten(el, only_lists = only_lists): yield sub else: @@ -587,7 +572,7 @@ def extract_numeric_values(*args): values = [] for arg in args: - if isinstance(arg, collections.Iterable) and type(arg) != list and type(arg) != tuple and type(arg) != str and type(arg) != unicode: # does not work fo other Iterable than RangeCore, but can t import RangeCore here for circular reference issues + if isinstance(arg, collections.abc.Iterable) and type(arg) != list and type(arg) != tuple and type(arg) != str: # does not work fo other Iterable than RangeCore, but can t import RangeCore here for circular reference issues values.extend([x for x in arg.values if is_number(x) and type(x) is not bool]) # for x in arg.values: # if is_number(x) and type(x) is not bool: # excludes booleans from nested ranges @@ -602,21 +587,6 @@ def extract_numeric_values(*args): return values - -def old_div(a, b): - """ - Equivalent to ``a / b`` on Python 2 without ``from __future__ import - division``. - - Copied from: - https://github.com/PythonCharmers/python-future/blob/master/src/past/utils/__init__.py - """ - if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): - return a // b - else: - return a / b - - def safe_iterator(node, tag=None): """Return an iterator or an empty list""" if node is None: diff --git a/requirements.txt b/requirements.txt index c676f783..4486046f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,8 @@ -networkx==2.1 -openpyxl==2.5.3 +networkx>=2.4 +openpyxl>=3.0.3 numpy>=1.14.2 -Cython==0.28.2 -lxml==4.1.1 -six==1.11.0 +numpy-financial>=1.0.0 +Cython>=0.29.15 +lxml>=4.5.0 scipy>=1.0.0 -python-dateutil==2.8.0 -backports.functools_lru_cache==1.5 +python-dateutil>=2.8.0 diff --git a/setup.py b/setup.py index ace79353..5cf93c7c 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ packages=find_packages(), classifiers=[ - "Programming Language :: Python :: 2.7 :: 3", + "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 'Operating System :: Microsoft :: Windows', 'Operating System :: MacOS :: MacOS X', @@ -63,14 +63,13 @@ install_requires=[ - 'networkx >= 2.1', - 'openpyxl >= 2.5.3', + 'networkx >= 2.4', + 'openpyxl >= 3.0.3', 'numpy >= 1.14.2', - 'Cython >= 0.28.2', - 'lxml >= 4.1.1', - 'six >= 1.11.0', + 'numpy-financial>=1.0.0', + 'Cython >= 0.29.15', + 'lxml >= 4.5.0', 'scipy>=1.0.0', - 'python-dateutil==2.8.0', - 'backports.functools_lru_cache==1.5' + 'python-dateutil>=2.8.0' ] ) diff --git a/tests/ast/test_compile.py b/tests/ast/test_compile.py index 7c6992b5..37133d2c 100644 --- a/tests/ast/test_compile.py +++ b/tests/ast/test_compile.py @@ -46,7 +46,7 @@ def test_string(self): code, ast = cell2code(cell, named_ranges=[]) - assert code == u'u"hello world"' + assert code == u'"hello world"' RangeCore assert eval(code) == u"hello world" @@ -57,7 +57,7 @@ def test_string_unicode(self): code, ast = cell2code(cell, named_ranges=[]) - assert code == u'u"hello world ☺"' + assert code == u'"hello world ☺"' RangeCore assert eval(code) == u"hello world ☺" @@ -68,7 +68,7 @@ def test_string_quotes(self): code, ast = cell2code(cell, named_ranges=[]) - assert code == u'u"hello \\"world\'"' + assert code == u'"hello \\"world\'"' RangeCore assert eval(code) == u"hello \"world'" diff --git a/tests/ast/test_range.py b/tests/ast/test_range.py index c51be38e..3512196b 100644 --- a/tests/ast/test_range.py +++ b/tests/ast/test_range.py @@ -1,4 +1,3 @@ -from __future__ import print_function import unittest from koala.Range import RangeFactory @@ -12,12 +11,10 @@ def test_Range_Factory(self): cellmap = {'A1': 25, 'B1': 34, 'C1': 79} Range = RangeFactory(cellmap) range = Range('A1:C1') - print('RANGE', range) self.assertEqual(range[(1, 'B')], 34) def test_Range_getter(self): range = Range('Sheet1!A1:A3', [10, 20, 30]) - print('RANGE2', range) self.assertEqual(range.values, [10, 20, 30]) def test_Range_setter(self): diff --git a/tests/excel/test_excel.py b/tests/excel/test_excel.py index 28348dff..0205a48a 100644 --- a/tests/excel/test_excel.py +++ b/tests/excel/test_excel.py @@ -12,7 +12,7 @@ class Test_SharedFormula(unittest.TestCase): def setUp(self): file_name = os.path.abspath("./tests/files/SharedFormula.xlsx") archive = read_archive(file_name) - self.cells = read_cells(archive) + self.cells, _ = read_cells(archive) @unittest.skip('This test fails.') def test_nb_formulas(self): diff --git a/tests/excel/test_functions.py b/tests/excel/test_functions.py index 1d1b76a9..7be69dee 100644 --- a/tests/excel/test_functions.py +++ b/tests/excel/test_functions.py @@ -1,6 +1,5 @@ -from __future__ import absolute_import - -import pyximport; pyximport.install() +import pyximport +pyximport.install() import unittest diff --git a/tests/excel/test_utils.py b/tests/excel/test_utils.py index c36b207f..7ad16685 100644 --- a/tests/excel/test_utils.py +++ b/tests/excel/test_utils.py @@ -1,6 +1,6 @@ -from __future__ import absolute_import +import pyximport -import pyximport; pyximport.install() +pyximport.install() import unittest diff --git a/tox.ini b/tox.ini index 1ba3b0b3..fcee21e8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,11 @@ [tox] -envlist = py27,py34,py35,py36 +envlist = py36,py37 recreate = True [testenv] basepython = - py27: python2.7 - py34: python3.4 - py35: python3.5 - py36: python3.6 + py37: python3.7 + py38: python3.8 deps= pytest -r{toxinidir}/requirements.txt