Skip to content

Commit

Permalink
Merge pull request #669 from chanzuckerberg/joyce/css4-colors
Browse files Browse the repository at this point in the history
fix: allow duplicate colors and use matplotlib for css4 named colors
  • Loading branch information
joyceyan authored Oct 13, 2023
2 parents 4b3d5b0 + 5945291 commit 04413ad
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 167 deletions.
164 changes: 5 additions & 159 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import os
import re
from datetime import datetime
from typing import Any, Dict, List, Mapping, Optional, Union
from typing import Dict, List, Mapping, Optional, Union

import anndata
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd
from pandas.core.computation.ops import UndefinedVariableError
Expand Down Expand Up @@ -772,175 +773,20 @@ def _validate_colors_in_uns_dict(self, uns_dict: dict) -> None:
f"Colors in uns[{key}] must be strings. Found: {value} which are {value.dtype.name}"
)
continue
# 4. Verify that we have at least as many unique colors as unique values in the corresponding categorical field
value = np.unique(value)
# 4. Verify that we have at least as many colors as unique values in the corresponding categorical field
if len(value) < obs_unique_values:
self.errors.append(
f"Annotated categorical field {key.replace('_colors', '')} must have at least {obs_unique_values} color options "
f"in uns[{key}]. Found: {value}"
)
# 5. Verify that either all colors are hex OR all colors are CSS4 named colors strings
all_hex_colors = all((self._validate_hex_color(color) for color in value))
all_css4_colors = all((self._validate_css4_color(color) for color in value))
all_hex_colors = all(re.match(r"^#([0-9a-fA-F]{6})$", color) for color in value)
all_css4_colors = all(color in mcolors.CSS4_COLORS for color in value)
if not (all_hex_colors or all_css4_colors):
self.errors.append(
f"Colors in uns[{key}] must be either all hex colors or all CSS4 named colors. Found: {value}"
)

def _validate_css4_color(self, color: Any) -> bool:
if not isinstance(color, str):
return False
css4_named_colors = [
"aliceblue",
"antiquewhite",
"aqua",
"aquamarine",
"azure",
"beige",
"bisque",
"black",
"blanchedalmond",
"blue",
"blueviolet",
"brown",
"burlywood",
"cadetblue",
"chartreuse",
"chocolate",
"coral",
"cornflowerblue",
"cornsilk",
"crimson",
"cyan",
"darkblue",
"darkcyan",
"darkgoldenrod",
"darkgray",
"darkgreen",
"darkkhaki",
"darkmagenta",
"darkolivegreen",
"darkorange",
"darkorchid",
"darkred",
"darksalmon",
"darkseagreen",
"darkslateblue",
"darkslategray",
"darkturquoise",
"darkviolet",
"deeppink",
"deepskyblue",
"dimgray",
"dodgerblue",
"firebrick",
"floralwhite",
"forestgreen",
"fuchsia",
"gainsboro",
"ghostwhite",
"gold",
"goldenrod",
"gray",
"green",
"greenyellow",
"grey",
"honeydew",
"hotpink",
"indianred",
"indigo",
"ivory",
"khaki",
"lavender",
"lavenderblush",
"lawngreen",
"lemonchiffon",
"lightblue",
"lightcoral",
"lightcyan",
"lightgoldenrodyellow",
"lightgray",
"lightgreen",
"lightpink",
"lightsalmon",
"lightseagreen",
"lightskyblue",
"lightslategray",
"lightsteelblue",
"lightyellow",
"lime",
"limegreen",
"linen",
"magenta",
"maroon",
"mediumaquamarine",
"mediumblue",
"mediumorchid",
"mediumpurple",
"mediumseagreen",
"mediumslateblue",
"mediumspringgreen",
"mediumturquoise",
"mediumvioletred",
"midnightblue",
"mintcream",
"mistyrose",
"moccasin",
"navajowhite",
"navy",
"oldlace",
"olive",
"olivedrab",
"orange",
"orangered",
"orchid",
"palegoldenrod",
"palegreen",
"paleturquoise",
"palevioletred",
"papayawhip",
"peachpuff",
"peru",
"pink",
"plum",
"powderblue",
"purple",
"rebeccapurple",
"red",
"rosybrown",
"royalblue",
"saddlebrown",
"salmon",
"sandybrown",
"seagreen",
"seashell",
"sienna",
"silver",
"skyblue",
"slateblue",
"slategray",
"snow",
"springgreen",
"steelblue",
"tan",
"teal",
"thistle",
"tomato",
"turquoise",
"violet",
"wheat",
"white",
"whitesmoke",
"yellow",
"yellowgreen",
]
return color in css4_named_colors

def _validate_hex_color(self, color: Any) -> bool:
if not isinstance(color, str):
return False
return re.match(r"^#([0-9a-fA-F]{6})$", color)

def _validate_sparsity(self):
"""
calculates sparsity of x and raw.x, if bigger than indicated in the schema and not a scipy sparse matrix, then
Expand Down
1 change: 1 addition & 0 deletions cellxgene_schema_cli/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ PyYaml==6.0
wheel==0.40.0
semver==3.0.0
xxhash==3.3.0
matplotlib==3.7.3
13 changes: 5 additions & 8 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -1585,6 +1585,11 @@ def test_colors_happy_path_no_column_def(self, validator_with_adata):
validator.adata.uns["test_column_colors"] = numpy.array(["#000000", "#ffffff"])
assert validator.validate_adata()

def test_colors_happy_path_duplicates(self, validator_with_adata):
validator = validator_with_adata
validator.adata.uns["suspension_type_colors"] = numpy.array(["lightgrey", "lightgrey"])
assert validator.validate_adata()

def test_colors_not_numpy_array(self, validator_with_adata):
validator = validator_with_adata
validator.adata.uns["suspension_type_colors"] = ["green", "purple"]
Expand Down Expand Up @@ -1627,14 +1632,6 @@ def test_not_enough_color_options(self, validator_with_adata):
"ERROR: Annotated categorical field suspension_type must have at least 2 color options in uns[suspension_type_colors]. Found: ['green']"
]

def test_not_enough_unique_color_options(self, validator_with_adata):
validator = validator_with_adata
validator.adata.uns["suspension_type_colors"] = numpy.array(["green", "green"])
validator.validate_adata()
assert validator.errors == [
"ERROR: Annotated categorical field suspension_type must have at least 2 color options in uns[suspension_type_colors]. Found: ['green']"
]

def test_different_color_types(self, validator_with_adata):
validator = validator_with_adata
validator.adata.uns["suspension_type_colors"] = numpy.array(["#000000", "pink"])
Expand Down

0 comments on commit 04413ad

Please sign in to comment.