From 9789a80d422714bceee3123f1f4573a88b8c77df Mon Sep 17 00:00:00 2001 From: Mikhail Tavarez Date: Sun, 3 Nov 2024 13:00:07 -0600 Subject: [PATCH] Add `expandvars` and respective tests. Added lit to pixi.toml, and `magic run` to pre-commit configuration. squash code a bit by using write add tests Cleanup with formatting and sign add missing newline Signed-off-by: Mikhail Tavarez updated changelog --- .pre-commit-config.yaml | 6 +- docs/changelog.md | 3 + magic.lock | 18 +++ pixi.toml | 1 + stdlib/src/os/path/__init__.mojo | 1 + stdlib/src/os/path/path.mojo | 156 ++++++++++++++++++++++- stdlib/test/os/path/test_expandvars.mojo | 80 ++++++++++++ 7 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 stdlib/test/os/path/test_expandvars.mojo diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9ca3f26be67..ee19aa25527 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,19 +3,19 @@ repos: hooks: - id: mojo-format name: mojo-format - entry: mojo format + entry: magic run mojo format language: system files: '\.(mojo|🔥|py)$' stages: [commit] - id: check-docstrings name: check-docstrings - entry: python3 ./stdlib/scripts/check-docstrings.py + entry: magic run python3 ./stdlib/scripts/check-docstrings.py language: system pass_filenames: false stages: [commit] - id: check-license name: check-license - entry: mojo stdlib/scripts/check_licenses.mojo + entry: magic run mojo stdlib/scripts/check_licenses.mojo language: system files: '\.(mojo|🔥|py)$' stages: [commit] diff --git a/docs/changelog.md b/docs/changelog.md index ebd5e3e49bc..31857cf2fed 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -228,6 +228,9 @@ what we publish. of variables that are handled as synthetic types, e.g. `List` from Mojo or `std::vector` from C++. +- Added `os.path.expandvars` to expand environment variables in a string. + ([PR #3735](https://github.com/modularml/mojo/pull/3735) by [@thatstoasty](https://github.com/thatstoasty)). + ### 🦋 Changed - More things have been removed from the auto-exported set of entities in the `prelude` diff --git a/magic.lock b/magic.lock index 7ec6eea750a..48c92a3486e 100644 --- a/magic.lock +++ b/magic.lock @@ -119,6 +119,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.4-h064dc61_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lit-19.1.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py312h178313f_0.conda @@ -321,6 +322,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxml2-2.13.4-hf4efe5d_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.1-h86ecc28_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lit-19.1.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lz4-c-1.9.4-hd600fc2_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/markupsafe-3.0.2-py312h74ce7d3_0.conda @@ -512,6 +514,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libuv-1.49.2-h7ab814d_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxml2-2.13.4-h8424949_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lit-19.1.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-19.1.3-hb52a8e5_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lz4-c-1.9.4-hb7217d7_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-3.0.0-pyhd8ed1ab_0.conda @@ -5229,6 +5232,21 @@ packages: license_family: Other size: 60963 timestamp: 1727963148474 +- kind: conda + name: lit + version: 19.1.3 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/lit-19.1.3-pyhd8ed1ab_0.conda + sha256: 7ec1944ef3a2bd3a163d2667f36c490d58d5389c062a5a836cc634d4ea0a95fb + md5: b929a6ecc1027275015c2acffdceaeb8 + depends: + - python >=3 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 128702 + timestamp: 1730301422924 - kind: conda name: llvm-openmp version: 19.1.3 diff --git a/pixi.toml b/pixi.toml index 8dddbc49006..7194b502096 100644 --- a/pixi.toml +++ b/pixi.toml @@ -11,4 +11,5 @@ benchmarks = { cmd = ["./stdlib/scripts/run-benchmarks.sh"], env = { MODULAR_MOJ [dependencies] python = ">=3.9,<3.13" +lit = "*" max = "*" diff --git a/stdlib/src/os/path/__init__.mojo b/stdlib/src/os/path/__init__.mojo index ac7d4aa8ecb..68097ee4db9 100644 --- a/stdlib/src/os/path/__init__.mojo +++ b/stdlib/src/os/path/__init__.mojo @@ -15,6 +15,7 @@ from .path import ( dirname, exists, expanduser, + expandvars, getsize, isdir, isfile, diff --git a/stdlib/src/os/path/path.mojo b/stdlib/src/os/path/path.mojo index 8cacd9af548..fcd502433cb 100644 --- a/stdlib/src/os/path/path.mojo +++ b/stdlib/src/os/path/path.mojo @@ -19,9 +19,10 @@ from os.path import isdir ``` """ -from collections import List +from collections import List, InlineArray from stat import S_ISDIR, S_ISLNK, S_ISREG from sys import has_neon, os_is_linux, os_is_macos, os_is_windows +from utils import Span, StringSlice from .. import PathLike from .._linux_aarch64 import _lstat as _lstat_linux_arm @@ -390,3 +391,156 @@ def split[PathLike: os.PathLike, //](path: PathLike) -> (String, String): # paths_str.append(cur_path[].__fspath__()) # return join(path.__fspath__(), *paths_str) + + +# ===----------------------------------------------------------------------=== # +# expandvars +# ===----------------------------------------------------------------------=== # + + +fn _is_shell_special_variable(byte: Byte) -> Bool: + """Checks if `$` + `byte` identifies a special shell variable, such as `$@`. + + Args: + byte: The byte to check. + + Returns: + True if the byte is a special shell variable and False otherwise. + """ + alias shell_variables = InlineArray[Int, 17]( + ord("*"), + ord("#"), + ord("$"), + ord("@"), + ord("!"), + ord("?"), + ord("-"), + ord("0"), + ord("1"), + ord("2"), + ord("3"), + ord("4"), + ord("5"), + ord("6"), + ord("7"), + ord("8"), + ord("9"), + ) + if int(byte) in shell_variables: + return True + return False + + +fn _is_alphanumeric(byte: Byte) -> Bool: + """Checks if `byte` is an ASCII letter, number, or underscore. + + Args: + byte: The byte to check. + + Returns: + True if the byte is an ASCII letter, number, or underscore and False otherwise. + """ + var b = int(byte) + return ( + b == ord("_") + or ord("0") <= b + and b <= ord("9") + or ord("a") <= b + and b <= ord("z") + or ord("A") <= b + and b <= ord("Z") + ) + + +fn _parse_variable_name(bytes: Span[Byte]) -> Tuple[String, Int]: + """Returns the environment variable name and the byte count required to extract it. + For `${}` expansions, two additional bytes are added to the byte count to account for the braces. + + Args: + bytes: The bytes to extract the environment variable name from. + + Returns: + The environment variable name and the byte count required to extract it. + """ + if bytes[0] == ord("{"): + if ( + len(bytes) > 2 + and _is_shell_special_variable(bytes[1]) + and bytes[2] == ord("}") + ): + return String(StringSlice(unsafe_from_utf8=bytes[1:2])), 3 + + # Scan until the closing brace or the end of the bytes. + var i = 1 + while i < len(bytes): + if bytes[i] == ord("}"): + if i == 1: + return String("${}"), 2 + return String(StringSlice(unsafe_from_utf8=bytes[1:i])), i + 1 + i += 1 + return String("${"), 1 + elif _is_shell_special_variable(bytes[0]): + return String(StringSlice(unsafe_from_utf8=bytes[0:1])), 1 + + # Scan until we hit an invalid character in environment variable names. + var i = 0 + while i < len(bytes) and _is_alphanumeric(bytes[i]): + i += 1 + + return String(StringSlice(unsafe_from_utf8=bytes[:i])), i + + +fn expandvars[PathLike: os.PathLike, //](path: PathLike) -> String: + """Replaces `${var}` or `$var` in the path with values from the current environment variables. + Undefined variables should be left alone. + + Parameters: + PathLike: The type conforming to the os.PathLike trait. + + Args: + path: The path to expand. + + Returns: + The input path with environment variables expanded. + """ + var path_str = path.__fspath__() + var bytes = path_str.as_bytes() + var buf = String() + + # Byte scanning should be fine, ${} is ASCII. + i = 0 + j = 0 + while j < len(bytes): + if bytes[j] == ord("$") and j + 1 < len(bytes): + if not buf: + buf._buffer.reserve(new_capacity=2 * len(bytes)) + buf.write_bytes(bytes[i:j]) + + name, length = _parse_variable_name(bytes[j + 1 :]) + + # Invalid syntax (`${}` or `${`); write as is. + if name.startswith("$") and length > 0: + buf.write(name) + # $ was not followed by a name, write the $. + elif name == "": + buf.write_bytes(bytes[j : j + 1]) + # Shell variable (eg `$@` or `$*`); write as is. + elif _is_shell_special_variable(name.as_bytes()[0]): + buf.write_bytes(bytes[j : j + 2]) + # Environment variable; expand it. If no value, write as is. + else: + value = os.getenv(name) + if value: + buf.write(value) + else: + buf.write_bytes(bytes[j : j + length + 1]) + + j += length + i = j + 1 + j += 1 + + if not buf: + return path_str + + buf.write_bytes(bytes[i:]) + return buf diff --git a/stdlib/test/os/path/test_expandvars.mojo b/stdlib/test/os/path/test_expandvars.mojo new file mode 100644 index 00000000000..e661675b6aa --- /dev/null +++ b/stdlib/test/os/path/test_expandvars.mojo @@ -0,0 +1,80 @@ +# ===----------------------------------------------------------------------=== # +# Copyright (c) 2024, Modular Inc. All rights reserved. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions: +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ===----------------------------------------------------------------------=== # +# RUN: %mojo %s + +import os +from os.path import expandvars +from testing import assert_equal + + +def test_expansion(): + _ = os.setenv("TEST_VAR", "World") + assert_equal(expandvars("Hello $TEST_VAR!"), "Hello World!") + assert_equal(expandvars("漢字 $TEST_VAR🔥!"), "漢字 World🔥!") + assert_equal(expandvars("$TEST_VAR/path/to/file"), "World/path/to/file") + + _ = os.setenv("UNICODE_TEST_VAR", "漢字🔥") + assert_equal(expandvars("Hello $UNICODE_TEST_VAR!"), "Hello 漢字🔥!") + assert_equal(expandvars("漢字 $UNICODE_TEST_VAR🔥!"), "漢字 漢字🔥🔥!") + assert_equal( + expandvars("$UNICODE_TEST_VAR/path/to/file"), "漢字🔥/path/to/file" + ) + + +def test_braced_expansion(): + _ = os.setenv("BRACE_VAR", "World") + assert_equal(expandvars("Hello ${BRACE_VAR}!"), "Hello World!") + assert_equal(expandvars("漢字 ${BRACE_VAR}🔥!"), "漢字 World🔥!") + assert_equal(expandvars("${BRACE_VAR}/path/to/file"), "World/path/to/file") + + _ = os.setenv("UNICODE_BRACE_VAR", "漢字🔥") + assert_equal(expandvars("Hello ${UNICODE_BRACE_VAR}!"), "Hello 漢字🔥!") + assert_equal(expandvars("漢字 ${UNICODE_BRACE_VAR}🔥!"), "漢字 漢字🔥🔥!") + assert_equal( + expandvars("${UNICODE_BRACE_VAR}/path/to/file"), "漢字🔥/path/to/file" + ) + + +def test_unset_expansion(): + # Unset variables should be expanded to an empty string. + assert_equal( + expandvars("Hello $NONEXISTENT_VAR!"), "Hello $NONEXISTENT_VAR!" + ) + assert_equal( + expandvars("漢字 ${NONEXISTENT_VAR}🔥!"), "漢字 ${NONEXISTENT_VAR}🔥!" + ) + + +def test_dollar_sign(): + # A lone dollar sign should not be expanded. + assert_equal(expandvars("A lone $ sign"), "A lone $ sign") + + # Special shell variables should not be expanded. + assert_equal( + expandvars("$@ $* $1 $2 $3 $NONEXISTENT_VAR."), + "$@ $* $1 $2 $3 $NONEXISTENT_VAR.", + ) + + +def test_invalid_syntax(): + # Invalid syntax should be written as is. + assert_equal(expandvars("${}"), "${}") + assert_equal(expandvars("${"), "${") + + +def main(): + test_expansion() + test_braced_expansion() + test_unset_expansion() + test_dollar_sign() + test_invalid_syntax()