Skip to content

Commit

Permalink
Merge pull request #16135 from sylwia-budzynska/gradio-model
Browse files Browse the repository at this point in the history
Python: Add Gradio models
  • Loading branch information
yoff authored May 16, 2024
2 parents 4617c05 + 72493a6 commit 5076b1a
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/codeql/reusables/supported-frameworks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ and the CodeQL library pack ``codeql/python-all`` (`changelog <https://github.co
Flask-Admin, Web framework
Tornado, Web framework
Twisted, Web framework
Gradio, Web framework
starlette, Asynchronous Server Gateway Interface (ASGI)
ldap3, Lightweight Directory Access Protocol (LDAP)
python-ldap, Lightweight Directory Access Protocol (LDAP)
Expand Down
1 change: 1 addition & 0 deletions python/ql/lib/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ private import semmle.python.frameworks.FastApi
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskAdmin
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Gradio
private import semmle.python.frameworks.Httpx
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
Expand Down
123 changes: 123 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Gradio.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/**
* Provides classes modeling security-relevant aspects of the `gradio` PyPI package.
* See https://pypi.org/project/gradio/.
*/

import python
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.TaintTracking
import semmle.python.ApiGraphs

/**
* Provides models for the `gradio` PyPI package.
* See https://pypi.org/project/gradio/.
*/
module Gradio {
/**
* The event handlers, Interface and gradio.ChatInterface classes, which take untrusted data.
*/
private class GradioInput extends API::CallNode {
GradioInput() {
this =
API::moduleImport("gradio")
.getMember([
"Button", "Textbox", "UploadButton", "Slider", "JSON", "HTML", "Markdown", "File",
"AnnotatedImage", "Audio", "BarPlot", "Chatbot", "Checkbox", "CheckboxGroup",
"ClearButton", "Code", "ColorPicker", "Dataframe", "Dataset", "DownloadButton",
"Dropdown", "DuplicateButton", "FileExplorer", "Gallery", "HighlightedText",
"Image", "ImageEditor", "Label", "LinePlot", "LoginButton", "LogoutButton",
"Model3D", "Number", "ParamViewer", "Plot", "Radio", "ScatterPlot", "SimpleImage",
"State", "Video"
])
.getReturn()
.getMember([
"change", "input", "click", "submit", "edit", "clear", "play", "pause", "stop",
"end", "start_recording", "pause_recording", "stop_recording", "focus", "blur",
"upload", "release", "select", "stream", "like", "load", "key_up",
])
.getACall()
or
this = API::moduleImport("gradio").getMember(["Interface", "ChatInterface"]).getACall()
}
}

/**
* The `inputs` parameters in Gradio event handlers, that are lists and are sources of untrusted data.
* This model allows tracking each element list back to source, f.ex. `gr.Textbox(...)`.
*/
private class GradioInputList extends RemoteFlowSource::Range {
GradioInputList() {
exists(GradioInput call |
// limit only to lists of parameters given to `inputs`.
(
(
call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode
or
call.getParameter(1).asSink().asCfgNode() instanceof ListNode
) and
(
this = call.getKeywordParameter("inputs").getASubscript().getAValueReachingSink()
or
this = call.getParameter(1).getASubscript().getAValueReachingSink()
)
)
)
}

override string getSourceType() { result = "Gradio untrusted input" }
}

/**
* The `inputs` parameters in Gradio event handlers, that are not lists and are sources of untrusted data.
*/
private class GradioInputParameter extends RemoteFlowSource::Range {
GradioInputParameter() {
exists(GradioInput call |
this = call.getParameter(0, "fn").getParameter(_).asSource() and
// exclude lists of parameters given to `inputs`
not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode and
not call.getParameter(1).asSink().asCfgNode() instanceof ListNode
)
}

override string getSourceType() { result = "Gradio untrusted input" }
}

/**
* The `inputs` parameters in Gradio decorators to event handlers, that are sources of untrusted data.
*/
private class GradioInputDecorator extends RemoteFlowSource::Range {
GradioInputDecorator() {
exists(GradioInput call |
this = call.getReturn().getACall().getParameter(0).getParameter(_).asSource()
)
}

override string getSourceType() { result = "Gradio untrusted input" }
}

/**
* Extra taint propagation for tracking `inputs` parameters in Gradio event handlers, that are lists.
*/
private class ListTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(GradioInput node |
// handle cases where there are multiple arguments passed as a list to `inputs`
(
(
node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode
or
node.getParameter(1).asSink().asCfgNode() instanceof ListNode
) and
exists(int i | nodeTo = node.getParameter(0, "fn").getParameter(i).asSource() |
nodeFrom.asCfgNode() =
node.getKeywordParameter("inputs").asSink().asCfgNode().(ListNode).getElement(i)
or
nodeFrom.asCfgNode() =
node.getParameter(1).asSink().asCfgNode().(ListNode).getElement(i)
)
)
)
}
}
}
4 changes: 4 additions & 0 deletions python/ql/src/change-notes/2024-04-05-gradio-models.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added models of `gradio` PyPI package.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
testFailures
failures
33 changes: 33 additions & 0 deletions python/ql/test/library-tests/frameworks/gradio/source_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import gradio as gr


with gr.Blocks() as demo:
name = gr.Textbox(label="Name")
output = gr.Textbox(label="Output Box")
# static block - not used as a source
static_block = gr.HTML("""
<div style='height: 100px; width: 800px; background-color: pink;'></div>
""")
greet_btn = gr.Button("Hello")

# decorator
@greet_btn.click(inputs=name, outputs=output)
def greet(name): # $ source=name
return "Hello " + name + "!"

# `click` event handler with keyword arguments
def greet1(name): # $ source=name
return "Hello " + name + "!"

greet1_btn = gr.Button("Hello")
greet1_btn.click(fn=greet1, inputs=name, outputs=output, api_name="greet")

# `click` event handler with positional arguments
def greet2(name): # $ source=name
return "Hello " + name + "!"

greet2_btn = gr.Button("Hello")
greet2_btn.click(fn=greet2, inputs=name, outputs=output, api_name="greet")


demo.launch()
20 changes: 20 additions & 0 deletions python/ql/test/library-tests/frameworks/gradio/source_test.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import python
import semmle.python.dataflow.new.RemoteFlowSources
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode

module SourceTest implements TestSig {
string getARelevantTag() { result = "source" }

predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(RemoteFlowSource rfs |
location = rfs.getLocation() and
element = rfs.toString() and
value = prettyNode(rfs) and
tag = "source"
)
}
}

import MakeTest<SourceTest>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
edges
| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | taint_step_test.py:19:43:19:46 | ControlFlowNode for path | provenance | |
| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:5:5:5:8 | ControlFlowNode for path | provenance | |
| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | taint_step_test.py:19:48:19:51 | ControlFlowNode for file | provenance | |
| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:6:5:6:8 | ControlFlowNode for file | provenance | |
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | |
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep |
| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep |
| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | provenance | |
| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | taint_step_test.py:11:18:11:21 | ControlFlowNode for path | provenance | AdditionalTaintStep |
| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | taint_step_test.py:11:24:11:27 | ControlFlowNode for file | provenance | AdditionalTaintStep |
nodes
| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | semmle.label | ControlFlowNode for path |
| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | semmle.label | ControlFlowNode for file |
| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | semmle.label | ControlFlowNode for path |
| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | semmle.label | ControlFlowNode for file |
| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath |
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath |
| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | semmle.label | ControlFlowNode for path |
| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | semmle.label | ControlFlowNode for file |
subpaths
#select
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | user-provided value |
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | user-provided value |
22 changes: 22 additions & 0 deletions python/ql/test/library-tests/frameworks/gradio/taint_step_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import gradio as gr
import os

with gr.Blocks() as demo:
path = gr.Textbox(label="Path") # $ source=gr.Textbox(..)
file = gr.Textbox(label="File") # $ source=gr.Textbox(..)
output = gr.Textbox(label="Output Box")


# path injection sink
def fileread(path, file):
filepath = os.path.join(path, file)
with open(filepath, "r") as f:
return f.read()


# `click` event handler with `inputs` containing a list
greet1_btn = gr.Button("Path for the file to display")
greet1_btn.click(fn=fileread, inputs=[path,file], outputs=output, api_name="fileread")


demo.launch()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Security/CWE-022/PathInjection.ql

0 comments on commit 5076b1a

Please sign in to comment.