Skip to content
This repository has been archived by the owner on Sep 5, 2022. It is now read-only.

Commit

Permalink
Improve matching algorithm
Browse files Browse the repository at this point in the history
* Consider transaction code when matching payments
* Use transaction code when matching payments
* Combine loading mechanism in 'merge_data' function
* Add ZIP file extraction for invoices - fixes #6
* Add development directories to .gitignore
* Rename 'convert_cost' to 'convert_number' not being limited to currency
* Fix cases of duplicate entries
* Fix regular payments being processed/imported
  • Loading branch information
S1SYPHOS committed May 16, 2021
1 parent d4bba2f commit e813590
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 75 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Project-related
src/
dist/
imports/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@ export_dir = ./dist # generated spreadsheets & graphs

# Regexes for import files
[regexes]
payment_regex = Download*.CSV # as exported by PayPal™
order_regex = Orders_*.csv # as exported by Shopkonfigurator
info_regex = OrdersInfo_*.csv # as exported by Shopkonfigurator
# (1) .. exported by PayPal™
payment_regex = Download*.CSV
# (2) .. exported by Shopkonfigurator
order_regex = Orders_*.csv
info_regex = OrdersInfo_*.csv
invoice_regex = *_Invoices_TimeFrom*_TimeTo*.pdf
```

As you can see, many config options refer to the directory from which `knvcli` is being called.
Expand Down
4 changes: 2 additions & 2 deletions knv_cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ~*~ coding=utf-8 ~*~


from configparser import SafeConfigParser
from os.path import basename, join

import click
Expand All @@ -14,6 +13,7 @@
from .utils import dump_csv, load_json
from .utils import build_path, create_path, group_data


clickpath = click.Path(exists=True)
pass_config = click.make_pass_decorator(Config, ensure=True)

Expand All @@ -25,7 +25,7 @@
@click.option('--import-dir', type=clickpath, help='Custom import directory.')
@click.option('--export-dir', type=clickpath, help='Custom export directory.')
def cli(config, verbose, data_dir, import_dir, export_dir):
"""Tools for handling KNV data"""
"""CLI utility for handling data exported from KNV & pcbis.de"""

# Apply CLI options
if verbose is not None:
Expand Down
7 changes: 4 additions & 3 deletions knv_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from configparser import SafeConfigParser
from os import getcwd
from os.path import isfile, join, realpath
from os.path import isfile, join

from xdg import xdg_config_home, xdg_data_home

Expand All @@ -26,10 +26,11 @@ def __init__(self):
'payment_regex': 'Download*.CSV',
'order_regex': 'Orders_*.csv',
'info_regex': 'OrdersInfo_*.csv',
'invoice_regex': '*_Invoices_TimeFrom*_TimeTo*.zip',
}

# Load config provided by user
config_file = realpath(join(xdg_config_home(), 'knv-cli', 'config'))
config_file = join(xdg_config_home(), 'knv-cli', 'config')

if isfile(config_file):
config.read(config_file)
Expand All @@ -56,7 +57,7 @@ def __init__(self):
# Load blocklist if one exists
self.blocklist = []

block_file = realpath(join(getcwd(), 'blocklist.txt'))
block_file = join(getcwd(), 'blocklist.txt')

if isfile(block_file):
with open(block_file, 'r') as file:
Expand Down
77 changes: 37 additions & 40 deletions knv_cli/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@


from os import remove
from os.path import join
from os.path import basename, join
from operator import itemgetter
from shutil import move
from zipfile import ZipFile

from .processors.paypal import process_payments
from .processors.shopkonfigurator import process_orders, process_infos
Expand Down Expand Up @@ -46,18 +47,7 @@ def import_payments(self) -> None:
payments = load_json(db_files)

# Compare existing & imported data if database was built before ..
if payments:
# Populate set with identifiers
codes = {payment['ID'] for payment in payments}

# Merge only data not already in database
for item in import_data:
if item['ID'] not in codes:
payments.append(item)

# .. otherwise, start from scratch
else:
payments = import_data
payments = self.merge_data(payments, import_data, 'Transaktion')

# Sort payments by date
payments.sort(key=itemgetter('Datum'))
Expand All @@ -84,18 +74,7 @@ def import_orders(self) -> None:
orders = load_json(db_files)

# Compare existing & imported data if database was built before ..
if orders:
# Populate set with identifiers
codes = {order['ID'] for order in orders}

# Merge only data not already in database
for item in import_data:
if item['ID'] not in codes:
orders.append(item)

# .. otherwise, start from scratch
else:
orders = import_data
orders = self.merge_data(orders, import_data, 'ID')

# Sort orders by date
orders.sort(key=itemgetter('Datum'))
Expand All @@ -122,18 +101,7 @@ def import_infos(self) -> None:
infos = load_json(db_files)

# Compare existing & imported data if database was built before ..
if infos:
# Populate set with identifiers
codes = {info['ID'] for info in infos}

# Merge only data not already in database
for item in import_data:
if item['ID'] not in codes:
infos.append(item)

# .. otherwise, start from scratch
else:
infos = import_data
infos = self.merge_data(infos, import_data, 'ID')

# Sort infos by date
infos.sort(key=itemgetter('Datum'))
Expand All @@ -145,8 +113,37 @@ def import_infos(self) -> None:

def import_invoices(self) -> None:
# Select invoice files to be imported
invoice_files = build_path(self.config.import_dir, '*.pdf')
invoice_files = build_path(self.config.import_dir, self.config.invoice_regex)

# Check invoices currently in database
invoices = build_path(self.config.invoice_dir, '*.pdf')
invoices = [basename(invoice) for invoice in invoices]

# Move them
for invoice_file in invoice_files:
move(invoice_file, self.config.invoice_dir)
try:
with ZipFile(invoice_file) as archive:
for zipped_invoice in archive.namelist():
# Import only invoices not already in database
if not zipped_invoice in invoices:
archive.extract(zipped_invoice, self.config.invoice_dir)

except:
raise Exception


def merge_data(self, data, import_data: list, identifier: str) -> list:
if data:
# Populate set with identifiers
codes = {item[identifier] for item in data}

# Merge only data not already in database
for item in import_data:
if item[identifier] not in codes:
codes.add(item[identifier])
data.append(item)

# .. otherwise, start from scratch
else:
data = import_data

return data
30 changes: 25 additions & 5 deletions knv_cli/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,28 @@ def match_dates(base_date, test_date, days=1) -> bool:
def match_orders(payment, orders) -> dict:
candidates = []

for item in orders:
costs_match = payment['Brutto'] == item['Betrag']
dates_match = match_dates(payment['Datum'], item['Datum'])
for order in orders:
# Skip payments other than PayPal™
if order['Abwicklung']['Zahlungsart'].lower() != 'paypal':
continue

# Determine matching transaction code ..
if order['Abwicklung']['Transaktionscode'] != 'keine Angabe':
if order['Abwicklung']['Transaktionscode'] == payment['Transaktion']:
# .. in which case there's a one-to-one match
return order

# .. otherwise, why bother
# TODO: In the future, this might be the way to go,
# simply because PayPal always includes a transaction code ..
#
# .. BUT the algorithm could be used for other payments
#
# else:
# continue

costs_match = payment['Brutto'] == order['Betrag']
dates_match = match_dates(payment['Datum'], order['Datum'])

if costs_match and dates_match:
# Let them fight ..
Expand All @@ -67,7 +86,8 @@ def match_orders(payment, orders) -> dict:
# Determine chance of match for given payment & order
# (1) Split by whitespace
payment_name = payment['Name'].split(' ')
order_name = item['Name'].split(' ')
order_name = order['Name'].split(' ')

# (2) Take first list item as first name, last list item as last name
payment_first, payment_last = payment_name[0], payment_name[-1]
order_first, order_last = order_name[0], order_name[-1]
Expand All @@ -80,7 +100,7 @@ def match_orders(payment, orders) -> dict:
if payment_last.lower() == order_last.lower():
hits += 2

candidates.append((hits, item))
candidates.append((hits, order))

matches = sorted(candidates, key=itemgetter(0), reverse=True)

Expand Down
6 changes: 3 additions & 3 deletions knv_cli/processors/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ def convert_date(string: str) -> str:
return datetime.strptime(string, '%d.%m.%Y').strftime('%Y-%m-%d')


def convert_cost(string) -> str:
if isinstance(string, float):
string = str(string)
def convert_number(string) -> str:
# Convert integers & floats
string = str(string)

string = float(string.replace(',', '.'))
integer = f'{string:.2f}'
Expand Down
14 changes: 9 additions & 5 deletions knv_cli/processors/paypal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# This module contains functions for processing 'Aktivitäten'
# See https://www.paypal.com/de/smarthelp/article/FAQ1007

from .helpers import convert_cost, convert_date
from .helpers import convert_number, convert_date


# Processes 'Download*.CSV' files
Expand All @@ -13,6 +13,10 @@ def process_payments(data) -> list:
payments = []

for item in data:
# Skip regular payments
if item['Typ'] == 'Allgemeine Zahlung':
continue

# Skip withdrawals
if item['Brutto'][:1] == '-':
continue
Expand All @@ -22,14 +26,14 @@ def process_payments(data) -> list:

payment = {}

payment['ID'] = code
payment['Transaktion'] = code
payment['Datum'] = convert_date(item['Datum'])
payment['Vorgang'] = 'nicht zugeordnet'
payment['Name'] = item['Name']
payment['Email'] = item['Absender E-Mail-Adresse']
payment['Brutto'] = convert_cost(item['Brutto'])
payment['Gebühr'] = convert_cost(item['Gebühr'])
payment['Netto'] = convert_cost(item['Netto'])
payment['Brutto'] = convert_number(item['Brutto'])
payment['Gebühr'] = convert_number(item['Gebühr'])
payment['Netto'] = convert_number(item['Netto'])
payment['Währung'] = item['Währung']

if code not in codes:
Expand Down
40 changes: 30 additions & 10 deletions knv_cli/processors/shopkonfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# This module contains functions for processing 'Auftragsdaten'
# See http://www.knv-info.de/wp-content/uploads/2020/04/Auftragsdatenexport2.pdf

from .helpers import convert_cost, convert_date
from .helpers import convert_number, convert_date


# Processes 'Orders_*.csv' files
Expand All @@ -22,10 +22,13 @@ def process_orders(order_data) -> list:
# .. and - more often than not - formatted as floats with a trailing zero
clean_isbn = str(clean_isbn).replace('.0', '')

# Populate set with identifiers
codes = {order for order in orders.keys()}

# Assign identifier
code = item['ormorderid']

if code not in orders.keys():
if code not in codes:
order = {}

order['ID'] = code
Expand All @@ -35,18 +38,31 @@ def process_orders(order_data) -> list:
order['Nachname'] = item['rechnungaddresslastname']
order['Name'] = ' '.join([item['rechnungaddressfirstname'], item['rechnungaddresslastname']])
order['Email'] = item['rechnungaddressemail']
order['Bestellung'] = {clean_isbn: item['quantity']}
order['Betrag'] = convert_cost(item['totalordercost'])
order['Bestellung'] = {'Summe': item['totalproductcost']}
order['Versand'] = convert_number(item['totalshipping'])
order['Betrag'] = convert_number(item['totalordercost'])
order['Währung'] = item['currency']
order['Abwicklung'] = {'Zahlungsart': 'keine Angabe', 'Transaktionscode': 'keine Angabe'}

orders[code] = order
codes.add(code)

else:
if clean_isbn not in orders[code]['Bestellung'].keys():
orders[code]['Bestellung'][clean_isbn] = item['quantity']
# Add information about each purchased article
orders[code]['Bestellung'][clean_isbn] = {
'Anzahl': int(item['quantity']),
'Preis': convert_number(item['orderitemunitprice']),
'Steuersatz': convert_number(item['vatpercent']),
'Steueranteil': convert_number(item['vatprice']),
}

else:
orders[code]['Bestellung'][clean_isbn] = orders[code]['Bestellung'][clean_isbn] + item['quantity']
# Add information about ..
# (1) .. method of payment
if str(item['paymenttype']) != 'nan':
orders[code]['Abwicklung']['Zahlungsart'] = item['paymenttype']

# (2) .. transaction number (Paypal™ only)
if str(item['transactionid']) != 'nan':
orders[code]['Abwicklung']['Transaktionscode'] = str(item['transactionid'])

return list(orders.values())

Expand All @@ -62,10 +78,13 @@ def process_infos(info_data) -> list:
if str(item['Invoice Number']) != 'nan':
clean_number = str(item['Invoice Number']).replace('.0', '')

# Populate set with identifiers
codes = {info for info in infos.keys()}

# Assign identifier
code = item['OrmNumber']

if code not in infos.keys():
if code not in codes:
info = {}

info['ID'] = code
Expand All @@ -75,6 +94,7 @@ def process_infos(info_data) -> list:
if clean_number:
info['Rechnungen'].append(clean_number)

codes.add(code)
infos[code] = info

else:
Expand Down
6 changes: 2 additions & 4 deletions knv_cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,8 @@ def create_path(file_path) -> None:
makedirs(dirname(file_path))

# Guard against race condition
except OSError as e:
# pylint: disable=undefined-variable
if e.errno != errno.EEXIST:
raise
except OSError:
pass


def dedupe(duped_data, encoding='utf-8') -> list:
Expand Down

0 comments on commit e813590

Please sign in to comment.