Skip to content

Commit

Permalink
Merge pull request #14114 from yoff/python/allow-namespace-packages
Browse files Browse the repository at this point in the history
Python: Allow namespace packages
  • Loading branch information
yoff authored Oct 26, 2023
2 parents abef848 + dcc7785 commit 867a390
Show file tree
Hide file tree
Showing 17 changed files with 146 additions and 40 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages.
124 changes: 96 additions & 28 deletions python/ql/lib/semmle/python/Module.qll
Original file line number Diff line number Diff line change
Expand Up @@ -179,21 +179,6 @@ private predicate legalDottedName(string name) {
bindingset[name]
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }

/**
* Holds if `f` is potentially a source package.
* Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive?
*/
private predicate isPotentialSourcePackage(Folder f) {
f.getRelativePath() != "" and
isPotentialPackage(f)
}

private predicate isPotentialPackage(Folder f) {
exists(f.getFile("__init__.py"))
or
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f)
}

private string moduleNameFromBase(Container file) {
// We used to also require `isPotentialPackage(f)` to hold in this case,
// but we saw modules not getting resolved because their folder did not
Expand Down Expand Up @@ -236,31 +221,114 @@ private predicate transitively_imported_from_entry_point(File file) {
)
}

/**
* Holds if the folder `f` is a regular Python package,
* containing an `__init__.py` file.
*/
private predicate isRegularPackage(Folder f, string name) {
legalShortName(name) and
name = f.getStem() and
exists(f.getFile("__init__.py"))
}

/** Gets the name of a module imported in package `c`. */
private string moduleImportedInPackage(Container c) {
legalShortName(result) and
// it has to be imported in this folder
result =
any(ImportExpr i | i.getLocation().getFile().getParent() = c)
.getName()
// strip everything after the first `.`
.regexpReplaceAll("\\..*", "") and
result != ""
}

/** Holds if the file `f` could be resolved to a module named `name`. */
private predicate isPotentialModuleFile(File file, string name) {
legalShortName(name) and
name = file.getStem() and
file.getExtension() = ["py", "pyc", "so", "pyd"] and
// it has to be imported in this folder
name = moduleImportedInPackage(file.getParent())
}

/**
* Holds if the folder `f` is a namespace package named `name`.
*
* See https://peps.python.org/pep-0420/#specification
* for details on namespace packages.
*/
private predicate isNameSpacePackage(Folder f, string name) {
legalShortName(name) and
name = f.getStem() and
not isRegularPackage(f, name) and
// it has to be imported in a file
// either in this folder or next to this folder
name = moduleImportedInPackage([f, f.getParent()]) and
// no sibling regular package
// and no sibling module
not exists(Folder sibling | sibling.getParent() = f.getParent() |
isRegularPackage(sibling.getFolder(name), name)
or
isPotentialModuleFile(sibling.getAFile(), name)
)
}

/**
* Holds if the folder `f` is a package (either a regular package
* or a namespace package) named `name`.
*/
private predicate isPackage(Folder f, string name) {
isRegularPackage(f, name)
or
isNameSpacePackage(f, name)
}

/**
* Holds if the file `f` is a module named `name`.
*/
private predicate isModuleFile(File file, string name) {
isPotentialModuleFile(file, name) and
not isPackage(file.getParent(), _)
}

/**
* Holds if the folder `f` is a package named `name`
* and does reside inside another package.
*/
private predicate isOutermostPackage(Folder f, string name) {
isPackage(f, name) and
not isPackage(f.getParent(), _)
}

/** Gets the name of the module that `c` resolves to, if any. */
cached
string moduleNameFromFile(Container file) {
string moduleNameFromFile(Container c) {
// package
isOutermostPackage(c, result)
or
// module
isModuleFile(c, result)
or
Stages::AST::ref() and
exists(string basename |
basename = moduleNameFromBase(file) and
basename = moduleNameFromBase(c) and
legalShortName(basename)
|
result = moduleNameFromFile(file.getParent()) + "." + basename
// recursive case
result = moduleNameFromFile(c.getParent()) + "." + basename
or
// If `file` is a transitive import of a file that's executed directly, we allow references
// to it by its `basename`.
transitively_imported_from_entry_point(file) and
transitively_imported_from_entry_point(c) and
result = basename
)
or
isPotentialSourcePackage(file) and
result = file.getStem() and
(
not isPotentialSourcePackage(file.getParent()) or
not legalShortName(file.getParent().getBaseName())
)
or
result = file.getStem() and file.getParent() = file.getImportRoot()
//
// standard library
result = c.getStem() and c.getParent() = c.getImportRoot()
or
result = file.getStem() and isStubRoot(file.getParent())
result = c.getStem() and isStubRoot(c.getParent())
}

private predicate isStubRoot(Folder f) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }

override string toString() {
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
result = "ModuleVariableNode in " + concat( | | mod.toString(), ",") + " for " + var.getId()
}

/** Gets the module in which this variable appears. */
Expand Down
1 change: 0 additions & 1 deletion python/ql/test/2/library-tests/six/pointsto.expected
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@
| six.moves.range | builtin-class xrange |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
| six.moves.urllib.parse.urlsplit | Function urlsplit |
| six.moves.zip | Builtin-function zip |
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
| module | hash_bang/module.py:0:0:0:0 | Module module |
| module | name_main/module.py:0:0:0:0 | Module module |
| namespace_package | hash_bang/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package | name_main/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package | no_py_extension/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package.namespace_package_main | hash_bang/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_main | name_main/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_main | no_py_extension/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_module | hash_bang/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| namespace_package.namespace_package_module | name_main/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| namespace_package.namespace_package_module | no_py_extension/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| package | hash_bang/package:0:0:0:0 | Package package |
| package | name_main/package:0:0:0:0 | Package package |
| package | no_py_extension/package:0:0:0:0 | Package package |
Expand Down
1 change: 0 additions & 1 deletion python/ql/test/3/library-tests/six/pointsto.expected
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@
| six.moves.range | builtin-class range |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
| six.moves.urllib.parse.urlsplit | Function urlsplit |
| six.moves.zip | builtin-class zip |
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@

from foo.bar.a import afunc
from foo_explicit.bar.a import explicit_afunc
from not_root.baz.foo import foo_func
from not_root.baz.bar.a import afunc as afunc2

afunc() # $ pt,tt="foo/bar/a.py:afunc"

explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc"

foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func"

afunc2() # $ pt,tt="not_root/baz/bar/a.py:afunc"
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
def afunc():
print("afunc called")
return 1
print("afunc called")
return 1

from foo.foo import foo_func
foo_func() # $ pt,tt="foo/foo.py:foo_func"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def foo_func():
print("foo_func called")
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
def explicit_afunc():
print("explicit_afunc called")
return 1
print("explicit_afunc called")
return 1

from foo_explicit.foo_explicit import foo_explicit_func
foo_explicit_func() # $ pt,tt="foo_explicit/foo_explicit.py:foo_explicit_func"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def foo_explicit_func():
print("foo_explicit_func called")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def afunc():
print("afunc called")
return 1

from not_root.baz.foo import foo_func
foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def foo_func():
print("foo_func called")
Original file line number Diff line number Diff line change
@@ -1 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3
semmle-extractor-options: --max-import-depth=3 --lang=3
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in external file called | pdb.py |
| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in local file called | test_ok.py |
| Local module | code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name | referenced in local file called | unique_name_use.py |
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
| code-invalid-package-name/cmd.py:0:0:0:0 | Script cmd.py |
| code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd |
| code-invalid-package-name/test_fail.py:0:0:0:0 | Script test_fail.py |
| code-invalid-package-name/test_ok.py:0:0:0:0 | Script test_ok.py |
| code-invalid-package-name/unique_name.py:0:0:0:0 | Script unique_name.py |
| code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name |
| code-invalid-package-name/unique_name_use.py:0:0:0:0 | Script unique_name_use.py |
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
| Module 'cmd' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 |
| Module 'cmd' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 |
| Module 'pdb' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_fail.py:3 |
| Module 'unique_name' (external, not in stdlib, missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 |
| Module 'unique_name' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 |

0 comments on commit 867a390

Please sign in to comment.