Skip to content

Commit

Permalink
refactor(perf): Don't recreate path mapping on every attribute access
Browse files Browse the repository at this point in the history
  • Loading branch information
dangotbanned committed Nov 7, 2024
1 parent f1d610c commit c4ef112
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions tools/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import json
import types
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

Expand Down Expand Up @@ -72,6 +73,13 @@ def __init__(
output_dir, name_tags=tags_gh, name_trees=trees_gh, **kwds_gh
)
self._npm: Npm = Npm(output_dir, name_tags=tags_npm, **kwds_npm)
self._paths = types.MappingProxyType["_PathAlias", Path](
{
"npm_tags": self.npm._paths["tags"],
"gh_tags": self.github._paths["tags"],
"gh_trees": self.github._paths["trees"],
}
)

@property
def github(self) -> GitHub:
Expand All @@ -81,23 +89,15 @@ def github(self) -> GitHub:
def npm(self) -> Npm:
return self._npm

@property
def _aliases(self) -> dict[_PathAlias, Path]:
return {
"npm_tags": self.npm._paths["tags"],
"gh_tags": self.github._paths["tags"],
"gh_trees": self.github._paths["trees"],
}

def refresh(self) -> pl.DataFrame:
npm_tags = self.npm.tags()
self.write_parquet(npm_tags, self.npm._paths["tags"])
self.write_parquet(npm_tags, self._paths["npm_tags"])

gh_tags = self.github.refresh_tags(npm_tags)
self.write_parquet(gh_tags, self.github._paths["tags"])
self.write_parquet(gh_tags, self._paths["gh_tags"])

gh_trees = self.github.refresh_trees(gh_tags)
self.write_parquet(gh_trees, self.github._paths["trees"])
self.write_parquet(gh_trees, self._paths["gh_trees"])
return gh_trees

def read(self, name: _PathAlias, /) -> pl.DataFrame:
Expand All @@ -113,7 +113,7 @@ def _from_alias(self, name: _PathAlias, /) -> Path:
msg = f'Expected one of {["npm_tags", "gh_tags", "gh_trees"]!r}, but got: {name!r}'
raise TypeError(msg)
else:
return self._aliases[name]
return self._paths[name]

def write_parquet(self, frame: pl.DataFrame | pl.LazyFrame, fp: Path, /) -> None:
"""Write ``frame`` to ``fp``, with some extra safety."""
Expand Down

0 comments on commit c4ef112

Please sign in to comment.