dot_viz.py

# -*- coding: utf-8 -*-

import argparse
import math
import os
from PIL import Image, ImageDraw, ImageFont
from pprint import pprint
import sys

import lib.io_utils as io
import lib.list_utils as lu
import lib.math_utils as mu

# input
parser = argparse.ArgumentParser()
parser.add_argument('-in', dest="INPUT_FILE", default="data/processed/all_normalized.csv", help="File generated by normalize_data.py")
parser.add_argument('-min', dest="MIN_ITEM_COUNT", default=100, type=int, help="Minumum value needed to not be considered 'Other'")
parser.add_argument('-width', dest="IMAGE_WIDTH", default=1920, type=int, help="Width of image in px")
parser.add_argument('-margin', dest="MARGIN", default=16, type=int, help="Base margin in px")
parser.add_argument('-dotwidth', dest="DOT_WIDTH", default=4, type=int, help="Dot width in px")
parser.add_argument('-minalpha', dest="MIN_ALPHA", default=50, type=int, help="Min dot alpha (0-255)")
parser.add_argument('-fontsize', dest="FONT_SIZE", default=16, type=int, help="Base font size in pixels")
parser.add_argument('-out', dest="OUTPUT_FILE", default="output/viz/%s.png", help="Output image file pattern")
a = parser.parse_args()

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_FILE])

TITLE_FONT_SIZE = int(round(a.FONT_SIZE * 2.8))
SECTION_FONT_SIZE = int(round(a.FONT_SIZE * 2))
font = ImageFont.truetype(font="fonts/Open_Sans/OpenSans-Bold.ttf", size=a.FONT_SIZE)
fontSection = ImageFont.truetype(font="fonts/Open_Sans/OpenSans-Bold.ttf", size=SECTION_FONT_SIZE)
fontTitle = ImageFont.truetype(font="fonts/Open_Sans/OpenSans-Bold.ttf", size=TITLE_FONT_SIZE)

fieldNames, items = io.readCsv(a.INPUT_FILE)
itemCount = len(items)

collections = [
    {
        "key": "Acquisition Year",
        "confidenceKey": "Acquisition Year Confidence",
        "groupKey": "Acquisition Era"
    },{
        "key": "Acquisition Type",
        "confidenceKey": "Acquisition Type Confidence",
        "noMinumum": True
    },{
        "key": "Country",
        "confidenceKey": "Country Confidence",
        "groupKey": "Region"
    },{
        "key": "Donor",
        "confidenceKey": "Donor Confidence"
    },{
        "key": "Hall",
        "emptyLabel": "Not displayed in hall",
        "noMinumum": True
    }
]

for i, item in enumerate(items):
    if item["Acquisition Year"] >= 9999:
        items[i]["Acquisition Year"] = None
items = mu.addNormalizedValues(items, "Acquisition Year", "nalpha")
for i, item in enumerate(items):
    if item["Acquisition Year"] is None:
        items[i]["Acquisition Year"] = 9999

# Process data into sections and groups within each section
for i, col in enumerate(collections):
    # Break items into sections
    itemsBySection = []
    groupBy = None
    if "groupKey" in col:
        groupBy = col["groupKey"]
        itemsBySection = lu.groupList([item for item in items if groupBy in item and len(str(item[groupBy])) > 0], groupBy)
        itemsBySection = sorted(itemsBySection, key=lambda k: k[groupBy])
    else:
        itemsBySection = [
            {"items": items, "count": itemCount}
        ]

    # Break groups down further into groups
    sections = []
    for sectionItems in itemsBySection:
        sectionItemsByGroup = lu.groupList(sectionItems["items"], col["key"], sort=True)
        # group by year if year
        if "Year" in col["key"]:
            sectionItemsByGroup = sorted(sectionItemsByGroup, key=lambda k: k[col["key"]])
        # group items with less than minimum into "Other" category
        else:
            validGroups = []
            otherGroup = {"items": [], "count": 0}
            otherGroup[col["key"]] = "Other"
            otherCount = 0
            for sectionGroupItems in sectionItemsByGroup:
                if sectionGroupItems["count"] >= a.MIN_ITEM_COUNT or "noMinumum" in col:
                    validGroups.append(sectionGroupItems)
                else:
                    otherGroup["items"] += sectionGroupItems["items"]
                    otherGroup["count"] += sectionGroupItems["count"]
                    otherCount += 1
            otherGroup[col["key"]] = "%s Others" % otherCount
            if otherCount > 0:
                validGroups.append(otherGroup)
            sectionItemsByGroup = validGroups

        # add confidence
        if "confidenceKey" in col:
            confidenceKey = col["confidenceKey"]
            # each group, sort by confidence
            for k, sectionGroupItems in enumerate(sectionItemsByGroup):
                sectionItemsByGroup[k]["items"] = sorted(sectionGroupItems["items"], key=lambda k: (-k[confidenceKey], k["Acquisition Year"]))
        else:
            for k, sectionGroupItems in enumerate(sectionItemsByGroup):
                sectionItemsByGroup[k]["items"] = sorted(sectionGroupItems["items"], key=lambda k: k["Acquisition Year"])

        # add group titles
        for k, sectionGroupItems in enumerate(sectionItemsByGroup):
            groupTitle = str(sectionGroupItems[col["key"]]) if sectionGroupItems[col["key"]] is not None else ""
            if groupTitle == "9999":
                groupTitle = ""
            if len(groupTitle) < 1:
                groupTitle = "Unknown" if "emptyLabel" not in col else col["emptyLabel"]
            sectionItemsByGroup[k]["title"] = groupTitle

        section = {}
        if groupBy is not None:
            section["title"] = str(sectionItems[groupBy])
        section["groups"] = sectionItemsByGroup
        sections.append(section)

    collections[i]["sections"] = sections

# Determine pixel values, positions, dimensions
chunkSize = 100
chunkColCount = int(math.sqrt(chunkSize))
chunkWidth = chunkColCount * a.DOT_WIDTH + (chunkColCount-1)
maxChunksPerRow = int(((a.IMAGE_WIDTH - a.MARGIN * 2) + 2.0) / (2 + chunkWidth))
# print(maxChunksPerRow)
drawData = []

for i, col in enumerate(collections):
    confidenceKey = col["confidenceKey"] if "confidenceKey" in col else None
    sectionX = 0
    sectionY = TITLE_FONT_SIZE + a.MARGIN * 2
    colSections = []

    print("Calculating %s" % col["key"])

    # for each section
    for j, section in enumerate(col["sections"]):
        groupX = 0
        groupY = SECTION_FONT_SIZE + a.MARGIN * 2
        groupRowHeight = 0

        if j == 0 and "title" not in section or section["title"] is None:
            sectionY = a.MARGIN * 2

        # for each group
        sectionGroups = []
        for k, group in enumerate(section["groups"]):
            groupItems = group["items"]
            # determine group size based on chunks
            chunkCount = int(math.ceil(1.0 * group["count"] / chunkSize))
            chunkCols = min(int(math.ceil(math.sqrt(chunkCount))), maxChunksPerRow)
            chunkRows = int(math.ceil(1.0 * chunkCount / chunkCols))

            # for each chunk
            groupChunks = []
            for l in range(chunkCount):

                # for each dot
                chunkDots = []
                for m in range(chunkSize):
                    groupItemsIndex = l * chunkSize + m

                    if groupItemsIndex >= len(groupItems):
                        break

                    # determine pixel color
                    item = groupItems[groupItemsIndex]
                    r = g = b = int(round(mu.lerp((255-a.MIN_ALPHA, 0), item["nalpha"])))
                    if confidenceKey is not None:
                        confidence = item[confidenceKey]
                        if confidence < 1.0:
                            if item["Acquisition Year"] >= 9999:
                                r = g = b = 0
                            r = int(round(mu.lerp((255.0, r), confidence)))

                    # determine position
                    dotRow = int(1.0 * m / chunkColCount)
                    dotCol = m % chunkColCount
                    dotDrawData = {
                        "x": dotCol * a.DOT_WIDTH + dotCol-1,
                        "y": dotRow * a.DOT_WIDTH + dotRow-1,
                        "width": a.DOT_WIDTH,
                        "height": a.DOT_WIDTH,
                        "color": (r, g, b, 255)
                    }
                    chunkDots.append(dotDrawData)

                chunkRow = int(1.0 * l / chunkCols)
                chunkCol = l % chunkCols
                chunkDrawData = {
                    "x": chunkCol * chunkWidth + (chunkCol-1) * 2,
                    "y": chunkRow * chunkWidth + (chunkRow-1) * 2,
                    "width": max([d["x"]+d["width"] for d in chunkDots]),
                    "height": max([d["y"]+d["height"] for d in chunkDots]),
                    "dots": chunkDots
                }
                groupChunks.append(chunkDrawData)

            groupWidth = max([c["x"]+c["width"] for c in groupChunks])
            groupDataWidth = groupWidth
            groupHeight = max([c["y"]+c["height"] for c in groupChunks])
            groupTitleW, groupTitleH = font.getsize(group["title"])
            groupWidth = max(groupWidth, groupTitleW)

            if groupWidth > (a.IMAGE_WIDTH - a.MARGIN * 2):
                print("Group width too big: %s" % groupWidth)
                # sys.exit()

            # go to next row
            if groupX > 0 and (groupX + groupWidth) > (a.IMAGE_WIDTH - a.MARGIN):
                groupX = 0
                groupY += groupRowHeight + a.MARGIN * 3 + a.FONT_SIZE
                groupRowHeight = 0

            groupDrawData = {
                "x": groupX,
                "y": groupY,
                "width": groupWidth,
                "height": groupHeight,
                "dataWidth": groupDataWidth,
                "titleWidth": groupTitleW,
                "title": group["title"],
                "chunks": groupChunks
            }
            sectionGroups.append(groupDrawData)
            groupX += groupWidth + a.MARGIN
            if groupHeight > groupRowHeight:
                groupRowHeight = groupHeight

        sectionWidth = max([g["x"]+g["width"] for g in sectionGroups])
        sectionHeight = max([g["y"]+g["height"] for g in sectionGroups])
        sectionDrawData = {
            "x": sectionX,
            "y": sectionY,
            "title": section["title"] if "title" in section else None,
            "width": sectionWidth,
            "height": sectionHeight,
            "groups": sectionGroups
        }
        sectionY += sectionHeight + a.MARGIN * 2 + SECTION_FONT_SIZE
        colSections.append(sectionDrawData)
    drawData.append({
        "title": col["key"],
        "x": a.MARGIN,
        "y": a.MARGIN,
        "sections": colSections
    })

    # break

for col in drawData:
    imW = a.IMAGE_WIDTH
    imH = max([s["y"]+s["height"] for s in col["sections"]]) + a.MARGIN + TITLE_FONT_SIZE + a.MARGIN * 2
    im = Image.new('RGBA', (imW, imH), (255,255,255,0))
    title = col["title"]
    imageFn = a.OUTPUT_FILE % title

    draw = ImageDraw.Draw(im)

    # draw title
    titleW, titleH = fontTitle.getsize(col["title"])
    titleX = int(round((imW - titleW) * 0.5))
    titleY = col["y"]
    draw.text((titleX, titleY), col["title"], font=fontTitle, fill=(0,0,0,255))

    for section in col["sections"]:

        # draw section title
        if "title" in section and section["title"] is not None:
            sTitleW, sTitleH = fontSection.getsize(section["title"])
            sTitleX = int(round((imW - sTitleW) * 0.5))
            sTitleY = col["y"]+section["y"]
            draw.text((sTitleX, sTitleY), section["title"], font=fontSection, fill=(0,0,0,255))

        for group in section["groups"]:
            # draw group title
            gTitleW = group["titleWidth"]
            gWidth = group["width"]
            gTitleX = col["x"]+section["x"]+group["x"] + int(round((gWidth - gTitleW) * 0.5))
            if group["dataWidth"] < gWidth:
                gTitleX = col["x"]+section["x"]+group["x"]
            gTitleY = col["y"]+section["y"]+group["y"]
            draw.text((gTitleX, gTitleY), group["title"], font=font, fill=(0,0,0,255))

            # draw dots
            for chunk in group["chunks"]:
                for dot in chunk["dots"]:
                    x0 = col["x"]+section["x"]+group["x"]+chunk["x"]+dot["x"]
                    y0 = col["y"]+section["y"]+group["y"]+chunk["y"]+dot["y"] + a.FONT_SIZE + a.MARGIN
                    draw.rectangle([x0, y0, x0+dot["width"]-1, y0+dot["height"]-1], fill=dot["color"])

    im.save(imageFn, "PNG")
    print("Saved %s" % imageFn)