Skip to content

Commit

Permalink
Merge pull request #17683 from yoff/python/flip-default-for-stdlib-ex…
Browse files Browse the repository at this point in the history
…traction

Python: Flip default for stdlib extraction
  • Loading branch information
yoff authored Oct 11, 2024
2 parents 5b57826 + 5650694 commit 8f68115
Show file tree
Hide file tree
Showing 37 changed files with 48 additions and 93 deletions.
4 changes: 2 additions & 2 deletions python/extractor/cli-integration-test/extract-stdlib/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ rm -rf dbs

mkdir dbs

CODEQL_EXTRACTOR_PYTHON_DONT_EXTRACT_STDLIB=True $CODEQL database create dbs/without-stdlib --language python --source-root repo_dir/
$CODEQL database create dbs/without-stdlib --language python --source-root repo_dir/
$CODEQL query run --database dbs/without-stdlib query.ql > query.without-stdlib.actual
diff query.without-stdlib.expected query.without-stdlib.actual

LGTM_INDEX_EXCLUDE="/usr/lib/**" $CODEQL database create dbs/with-stdlib --language python --source-root repo_dir/
LGTM_INDEX_EXCLUDE="/usr/lib/**" CODEQL_EXTRACTOR_PYTHON_EXTRACT_STDLIB=True $CODEQL database create dbs/with-stdlib --language python --source-root repo_dir/
$CODEQL query run --database dbs/with-stdlib query.ql > query.with-stdlib.actual
diff query.with-stdlib.expected query.with-stdlib.actual
16 changes: 14 additions & 2 deletions python/extractor/semmle/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ def make_parser():
config_options.add_option("--colorize", dest="colorize", default=False, action="store_true",
help = """Colorize the logging output.""")

config_options.add_option("--dont-extract-stdlib", dest="extract_stdlib", default=True, action="store_false",
help="Do not extract the standard library.")
config_options.add_option("--dont-extract-stdlib", dest="extract_stdlib", action="store_false",
help="This flag is deprecated; not extracting the standard library is now the default.")
config_options.add_option("--extract-stdlib", dest="extract_stdlib", default=False, action="store_true",
help="Extract the standard library.")

parser.add_option_group(config_options)

Expand Down Expand Up @@ -226,8 +228,18 @@ def parse(command_line):

if 'CODEQL_EXTRACTOR_PYTHON_DONT_EXTRACT_STDLIB' in os.environ:
options.extract_stdlib = False
print ("WARNING: CODEQL_EXTRACTOR_PYTHON_DONT_EXTRACT_STDLIB is deprecated; the default is now to not extract the standard library.")

if 'CODEQL_EXTRACTOR_PYTHON_EXTRACT_STDLIB' in os.environ:
options.extract_stdlib = True

options.prune = True

if options.extract_stdlib:
print ("WARNING: The analysis will extract the standard library. This behavior is deprecated and will be removed in a future release. We expect it to be gone in CLI version 2.20.0.")
else:
print ("INFO: The Python extractor has recently stopped extracting the standard library by default. If you encounter problems, please let us know by submitting an issue to https://github.com/github/codeql. It is possible to re-enable extraction of the standard library by setting the environment variable CODEQL_EXTRACTOR_PYTHON_EXTRACT_STDLIB.")

return options, args

def split_and_flatten(options_list, div):
Expand Down
2 changes: 1 addition & 1 deletion python/extractor/semmle/populator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def main(sys_path = sys.path[:]):
update_analysis_version(last_version)

found_py2 = False
if get_analysis_major_version() == 2:
if get_analysis_major_version() == 2 and options.extract_stdlib:
# Setup `sys_path` to use the Python 2 standard library
sys_path, found_py2 = get_py2_sys_path(logger, sys_path)

Expand Down
2 changes: 1 addition & 1 deletion python/extractor/semmle/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#Semantic version of extractor.
#Update this if any changes are made
VERSION = "6.1.2"
VERSION = "7.0.0"

PY_EXTENSIONS = ".py", ".pyw"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
| mapping | builtin-class collections.defaultdict |
| mapping | builtin-class dict |
| mapping | class MyDictSubclass |
| mapping | class MyMappingABC |
| mapping | class OrderedDict |
| neither sequence nor mapping | builtin-class set |
| neither sequence nor mapping | class MyMappingABC |
| neither sequence nor mapping | class MySequenceABC |
| sequence | builtin-class list |
| sequence | builtin-class str |
| sequence | builtin-class tuple |
| sequence | builtin-class unicode |
| sequence | class MySequenceABC |
| sequence | class MySequenceImpl |
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@
| test.py | 15 | ControlFlowNode for moduleX | Module package.moduleX | Entry node for Module package.moduleX |
| test.py | 16 | ControlFlowNode for Attribute | class Y | ControlFlowNode for ClassExpr |
| test.py | 16 | ControlFlowNode for moduleX | Module package.moduleX | Entry node for Module package.moduleX |
| test.py | 19 | ControlFlowNode for ImportExpr | Module tty | ControlFlowNode for ImportExpr |
| test.py | 19 | ControlFlowNode for tty | Module tty | ControlFlowNode for ImportExpr |
| test.py | 22 | ControlFlowNode for Attribute | Builtin-function exc_info | ControlFlowNode for from sys import * |
| test.py | 22 | ControlFlowNode for x | Module package.x | Entry node for Module package.x |
| test.py | 24 | ControlFlowNode for IntegerLiteral | int 0 | ControlFlowNode for IntegerLiteral |
| test.py | 24 | ControlFlowNode for argv | int 0 | ControlFlowNode for IntegerLiteral |
| test.py | 27 | ControlFlowNode for ImportExpr | Module sys | ControlFlowNode for ImportExpr |
| test.py | 31 | ControlFlowNode for argv | list object | ControlFlowNode for from sys import * |
| test.py | 33 | ControlFlowNode for ImportExpr | Module socket | ControlFlowNode for ImportExpr |
| test.py | 34 | ControlFlowNode for timeout | builtin-class socket.timeout | ControlFlowNode for from _socket import * |
| x.py | 2 | ControlFlowNode for ImportExpr | Module sys | ControlFlowNode for ImportExpr |
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@
| test.py | 15 | ControlFlowNode for moduleX | Module package.moduleX | builtin-class module | Entry node for Module package.moduleX |
| test.py | 16 | ControlFlowNode for Attribute | class Y | builtin-class type | ControlFlowNode for ClassExpr |
| test.py | 16 | ControlFlowNode for moduleX | Module package.moduleX | builtin-class module | Entry node for Module package.moduleX |
| test.py | 19 | ControlFlowNode for ImportExpr | Module tty | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 19 | ControlFlowNode for tty | Module tty | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 22 | ControlFlowNode for Attribute | Builtin-function exc_info | builtin-class builtin_function_or_method | ControlFlowNode for from sys import * |
| test.py | 22 | ControlFlowNode for x | Module package.x | builtin-class module | Entry node for Module package.x |
| test.py | 24 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | ControlFlowNode for IntegerLiteral |
| test.py | 24 | ControlFlowNode for argv | int 0 | builtin-class int | ControlFlowNode for IntegerLiteral |
| test.py | 27 | ControlFlowNode for ImportExpr | Module sys | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 31 | ControlFlowNode for argv | list object | builtin-class list | ControlFlowNode for from sys import * |
| test.py | 33 | ControlFlowNode for ImportExpr | Module socket | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 34 | ControlFlowNode for timeout | builtin-class socket.timeout | builtin-class type | ControlFlowNode for from _socket import * |
| x.py | 2 | ControlFlowNode for ImportExpr | Module sys | builtin-class module | ControlFlowNode for ImportExpr |
4 changes: 2 additions & 2 deletions python/ql/test/2/library-tests/six/pointsto.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
| six | Package six |
| six.moves | Package six.moves |
| six.moves.http_client | Module httplib |
| six.moves.http_client.HTTPConnection | class HTTPConnection |
| six.moves.http_client | Missing module httplib |
| six.moves.http_client.HTTPConnection | Missing module attribute httplib.HTTPConnection |
| six.moves.range | builtin-class xrange |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
Expand Down
2 changes: 1 addition & 1 deletion python/ql/test/2/query-tests/Summary/LinesOfCode.expected
Original file line number Diff line number Diff line change
@@ -1 +1 @@
| 38 |
| 11 |
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
| mapping | builtin-class collections.OrderedDict |
| mapping | builtin-class collections.defaultdict |
| mapping | builtin-class dict |
| mapping | class MyDictSubclass |
| mapping | class MyMappingABC |
| mapping | class OrderedDict |
| neither sequence nor mapping | builtin-class set |
| sequence | builtin-class bytes |
| sequence | builtin-class list |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@
| test.py | 15 | ControlFlowNode for moduleX | Module package.moduleX | Entry node for Module package.moduleX |
| test.py | 16 | ControlFlowNode for Attribute | class Y | ControlFlowNode for ClassExpr |
| test.py | 16 | ControlFlowNode for moduleX | Module package.moduleX | Entry node for Module package.moduleX |
| test.py | 19 | ControlFlowNode for ImportExpr | Module tty | ControlFlowNode for ImportExpr |
| test.py | 19 | ControlFlowNode for tty | Module tty | ControlFlowNode for ImportExpr |
| test.py | 22 | ControlFlowNode for Attribute | Builtin-function exc_info | ControlFlowNode for from sys import * |
| test.py | 22 | ControlFlowNode for x | Module package.x | Entry node for Module package.x |
| test.py | 24 | ControlFlowNode for IntegerLiteral | int 0 | ControlFlowNode for IntegerLiteral |
| test.py | 24 | ControlFlowNode for argv | int 0 | ControlFlowNode for IntegerLiteral |
| test.py | 27 | ControlFlowNode for ImportExpr | Module sys | ControlFlowNode for ImportExpr |
| test.py | 31 | ControlFlowNode for argv | list object | ControlFlowNode for from sys import * |
| test.py | 33 | ControlFlowNode for ImportExpr | Module socket | ControlFlowNode for ImportExpr |
| test.py | 34 | ControlFlowNode for timeout | builtin-class TimeoutError | ControlFlowNode for from _socket import * |
| x.py | 2 | ControlFlowNode for ImportExpr | Module sys | ControlFlowNode for ImportExpr |
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@
| test.py | 15 | ControlFlowNode for moduleX | Module package.moduleX | builtin-class module | Entry node for Module package.moduleX |
| test.py | 16 | ControlFlowNode for Attribute | class Y | builtin-class type | ControlFlowNode for ClassExpr |
| test.py | 16 | ControlFlowNode for moduleX | Module package.moduleX | builtin-class module | Entry node for Module package.moduleX |
| test.py | 19 | ControlFlowNode for ImportExpr | Module tty | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 19 | ControlFlowNode for tty | Module tty | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 22 | ControlFlowNode for Attribute | Builtin-function exc_info | builtin-class builtin_function_or_method | ControlFlowNode for from sys import * |
| test.py | 22 | ControlFlowNode for x | Module package.x | builtin-class module | Entry node for Module package.x |
| test.py | 24 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | ControlFlowNode for IntegerLiteral |
| test.py | 24 | ControlFlowNode for argv | int 0 | builtin-class int | ControlFlowNode for IntegerLiteral |
| test.py | 27 | ControlFlowNode for ImportExpr | Module sys | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 31 | ControlFlowNode for argv | list object | builtin-class list | ControlFlowNode for from sys import * |
| test.py | 33 | ControlFlowNode for ImportExpr | Module socket | builtin-class module | ControlFlowNode for ImportExpr |
| test.py | 34 | ControlFlowNode for timeout | builtin-class TimeoutError | builtin-class type | ControlFlowNode for from _socket import * |
| x.py | 2 | ControlFlowNode for ImportExpr | Module sys | builtin-class module | ControlFlowNode for ImportExpr |
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
| mwe_failure.py:7:1:7:23 | class MyTest | <MISSING BASE TYPE> |
| mwe_failure_2.py:7:1:7:23 | class MyTest | <MISSING BASE TYPE> |
| mwe_failure.py:7:1:7:23 | class MyTest | class TestCase |
| mwe_failure_2.py:7:1:7:23 | class MyTest | class TestCase |
| mwe_success.py:7:1:7:23 | class MyTest | class TestCase |
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
| Module package.assistant | e | Wrong() |
| Module package.assistant | f | int 1 |
| Module package.helper | __name__ | str u'package.helper' |
| Module package.helper | absolute_import | _Feature() |
| Module package.helper | assistant | Module package.assistant |
| Module package.helper | d | int 4 |
| Module package.helper | e | int 5 |
Expand Down
4 changes: 2 additions & 2 deletions python/ql/test/3/library-tests/six/pointsto.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
| six | Package six |
| six.moves | Package six.moves |
| six.moves.http_client | Module http.client |
| six.moves.http_client.HTTPConnection | class HTTPConnection |
| six.moves.http_client | Missing module http.client |
| six.moves.http_client.HTTPConnection | Missing module attribute http.client.HTTPConnection |
| six.moves.range | builtin-class range |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
Expand Down
2 changes: 1 addition & 1 deletion python/ql/test/3/query-tests/Summary/LinesOfCode.expected
Original file line number Diff line number Diff line change
@@ -1 +1 @@
| 51 |
| 11 |
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
| UndefinedExport.py:3:18:3:20 | StringLiteral | The name 'y' is exported by __all__ but is not defined. |
| UndefinedExport.py:3:23:3:25 | StringLiteral | The name 'z' is exported by __all__ but is not defined. |
| UndefinedExport.py:3:28:3:35 | StringLiteral | The name 'module' is exported by __all__ but is not defined. |
| enum_convert.py:8:13:8:19 | StringLiteral | The name 'Maybe' is exported by __all__ but is not defined. |
| enum_convert.py:8:22:8:32 | StringLiteral | The name 'Maybe_not' is exported by __all__ but is not defined. |
| package/__init__.py:1:23:1:34 | StringLiteral | The name 'not_exists' is exported by __all__ but is not defined. |
2 changes: 1 addition & 1 deletion python/ql/test/extractor-tests/flags/Flags.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
| options.all | False |
| options.colorize | True |
| options.context_cost | 11 |
| options.extract_stdlib | True |
| options.extract_stdlib | False |
| options.guess | False |
| options.help | False |
| options.ignore_missing_modules | False |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
| 1 | ControlFlowNode for functools | Module functools | test.py:1 |
| 3 | ControlFlowNode for annotate | Function annotate | test.py:3 |
| 4 | ControlFlowNode for inner | Function inner | test.py:4 |
| 5 | ControlFlowNode for func | Function func1 | test.py:23 |
Expand All @@ -11,7 +10,6 @@
| 13 | ControlFlowNode for wrapper | Function wrapper | test.py:10 |
| 15 | ControlFlowNode for wraps2 | Function wraps2 | test.py:15 |
| 16 | ControlFlowNode for func | Function func3 | test.py:31 |
| 16 | ControlFlowNode for functools | Module functools | test.py:1 |
| 17 | ControlFlowNode for args | args | test.py:17 |
| 17 | ControlFlowNode for wrapper | Attribute()() | test.py:16 |
| 18 | ControlFlowNode for args | args | test.py:17 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
| test.py:11:21:11:24 | ControlFlowNode for args | runtime | instance of tuple |
| test.py:13:12:13:18 | ControlFlowNode for wrapper | runtime | Function wraps1.wrapper |
| test.py:13:12:13:18 | ControlFlowNode for wrapper | test.py:26 from import | Function wraps1.wrapper |
| test.py:16:6:16:14 | ControlFlowNode for functools | runtime | Module functools |
| test.py:16:6:16:14 | ControlFlowNode for functools | test.py:30 from import | Module functools |
| test.py:16:6:16:14 | ControlFlowNode for functools | runtime | Missing module functools |
| test.py:16:6:16:14 | ControlFlowNode for functools | test.py:30 from import | Missing module functools |
| test.py:16:22:16:25 | ControlFlowNode for func | runtime | Unknown value |
| test.py:16:22:16:25 | ControlFlowNode for func | test.py:30 from import | Function func3 |
| test.py:18:21:18:24 | ControlFlowNode for args | runtime | instance of tuple |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@
| Module pointsto_test | 69 | ControlFlowNode for X | class X |
| Module pointsto_test | 70 | ControlFlowNode for Attribute | deco() |
| Module pointsto_test | 70 | ControlFlowNode for X | class X |
| Module pointsto_test | 72 | ControlFlowNode for ImportExpr | Module abc |
| Module pointsto_test | 72 | ControlFlowNode for ImportMember | Function abstractmethod |
| Module pointsto_test | 72 | ControlFlowNode for abstractmethod | Function abstractmethod |
| Module pointsto_test | 73 | ControlFlowNode for abstractmethod | Function abstractmethod |
| Module pointsto_test | 75 | ControlFlowNode for C | class C |
| Module pointsto_test | 75 | ControlFlowNode for C() | C() |
| Module pointsto_test | 75 | ControlFlowNode for type | builtin-class type |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@
| 69 | ControlFlowNode for X | class X |
| 70 | ControlFlowNode for Attribute | deco() |
| 70 | ControlFlowNode for X | class X |
| 72 | ControlFlowNode for ImportExpr | Module abc |
| 72 | ControlFlowNode for ImportMember | Function abstractmethod |
| 72 | ControlFlowNode for abstractmethod | Function abstractmethod |
| 73 | ControlFlowNode for abstractmethod | Function abstractmethod |
| 75 | ControlFlowNode for C | class C |
| 75 | ControlFlowNode for C() | C() |
| 75 | ControlFlowNode for type | builtin-class type |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@
| 69 | ControlFlowNode for Attribute | Attribute | builtin-class method |
| 69 | ControlFlowNode for X | class X | builtin-class type |
| 70 | ControlFlowNode for X | class X | builtin-class type |
| 72 | ControlFlowNode for ImportExpr | Module abc | builtin-class module |
| 72 | ControlFlowNode for ImportMember | Function abstractmethod | builtin-class function |
| 72 | ControlFlowNode for abstractmethod | Function abstractmethod | builtin-class function |
| 73 | ControlFlowNode for abstractmethod | Function abstractmethod | builtin-class function |
| 75 | ControlFlowNode for C | class C | builtin-class type |
| 75 | ControlFlowNode for C() | C() | class C |
| 75 | ControlFlowNode for type | builtin-class type | builtin-class type |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,3 @@
| type_test.py | 55 | ControlFlowNode for arg | class E | 29 |
| type_test.py | 67 | ControlFlowNode for x | float 1.0 | 62 |
| type_test.py | 67 | ControlFlowNode for x | int 0 | 62 |
| type_test.py | 77 | ControlFlowNode for IntegerLiteral | int 0 | 77 |
| type_test.py | 83 | ControlFlowNode for IntegerLiteral | int 0 | 83 |
| type_test.py | 89 | ControlFlowNode for IntegerLiteral | int 0 | 89 |
| type_test.py | 95 | ControlFlowNode for IntegerLiteral | int 0 | 95 |
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,3 @@
| type_test.py | 55 | ControlFlowNode for arg | class E | builtin-class type | 29 |
| type_test.py | 67 | ControlFlowNode for x | float 1.0 | builtin-class float | 62 |
| type_test.py | 67 | ControlFlowNode for x | int 0 | builtin-class int | 62 |
| type_test.py | 77 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | 77 |
| type_test.py | 83 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | 83 |
| type_test.py | 89 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | 89 |
| type_test.py | 95 | ControlFlowNode for IntegerLiteral | int 0 | builtin-class int | 95 |
3 changes: 0 additions & 3 deletions python/ql/test/library-tests/PointsTo/new/NameSpace.expected
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,12 @@
| h_classes.py:23 | Class Base | __init__ | Function __init__ |
| h_classes.py:48 | Class D | m | Function f |
| h_classes.py:48 | Class D | n | Function n |
| i_imports.py:0 | Module code.i_imports | BytesIO | builtin-class _io.BytesIO |
| i_imports.py:0 | Module code.i_imports | StringIO | builtin-class _io.StringIO |
| i_imports.py:0 | Module code.i_imports | _io | Module _io |
| i_imports.py:0 | Module code.i_imports | a | int 1 |
| i_imports.py:0 | Module code.i_imports | argv | list object |
| i_imports.py:0 | Module code.i_imports | b | int 2 |
| i_imports.py:0 | Module code.i_imports | c | int 3 |
| i_imports.py:0 | Module code.i_imports | code | Module code |
| i_imports.py:0 | Module code.i_imports | io | Module io |
| i_imports.py:0 | Module code.i_imports | module1 | Module code.test_package.module1 |
| i_imports.py:0 | Module code.i_imports | module2 | Module code.test_package.module2 |
| i_imports.py:0 | Module code.i_imports | p | int 1 |
Expand Down
Loading

0 comments on commit 8f68115

Please sign in to comment.