Skip to content

Commit

Permalink
Rust: parse formatting templates
Browse files Browse the repository at this point in the history
  • Loading branch information
aibaars committed Oct 16, 2024
1 parent 8f68115 commit 5883a85
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 4 deletions.
1 change: 0 additions & 1 deletion rust/ql/.generated.list

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion rust/ql/.gitattributes

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

216 changes: 214 additions & 2 deletions rust/ql/lib/codeql/rust/elements/internal/FormatArgsExprImpl.qll
Original file line number Diff line number Diff line change
@@ -1,22 +1,234 @@
// generated by codegen, remove this comment if you wish to edit this file
/**
* This module provides a hand-modifiable wrapper around the generated class `FormatArgsExpr`.
*
* INTERNAL: Do not use.
*/

private import codeql.rust.elements.internal.generated.FormatArgsExpr
private import codeql.rust.elements.LiteralExpr

/**
* INTERNAL: This module contains the customizable definition of `FormatArgsExpr` and should not
* be referenced directly.
*/
module Impl {
// private import codeql.rust.elements.internal.generated.LiteralExpr::LiteralExpr;
// the following QLdoc is generated: if you need to edit it, do it in the schema file
/**
* A FormatArgsExpr. For example:
* ```rust
* todo!()
* ```
*/
class FormatArgsExpr extends Generated::FormatArgsExpr { }
class FormatArgsExpr extends Generated::FormatArgsExpr {
/**
* Gets the `index`th format of this `FormatArgsExpr`'s formatting template (0-based).
*/
Format getFormat(int index) { result = TFormat(this, _, index, _) }
}

/**
* A format element in a formatting template. For example the `{}` in:
* ```rust
* println!("Hello {}", "world");
* ```
*/
class Format extends TFormat {
private FormatArgsExpr parent;
private string text;
private int offset;

Format() { this = TFormat(parent, text, _, offset) }

/** Gets a textual representation of this element. */
string toString() { result = text }

/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
parent
.getTemplate()
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn - offset, _, _) and
endline = startline and
endcolumn = startcolumn + text.length() - 1
}

/** Gets a the parent of this `Format`. */
FormatArgsExpr getParent() { result = parent }
}

/**
* An argument in a format element in a formatting template. For example the `width`, `precision`, and `value` in:
* ```rust
* println!("Value {value:#width$.precision$}");
* ```
* or the `0`, `1` and `2` in:
* ```rust
* println!("Value {0:#1$.2$}", value, width, precision);
* ```
*/
class FormatArgument extends TFormatArgument {
private Format parent;
private TFormatArgumentKind kind;

Check notice

Code scanning / CodeQL

Field only used in CharPred Note

Field is only used in CharPred.
string name;
private int offset;

FormatArgument() {
this = TPositional(parent, kind, name, offset) or this = TNamed(parent, kind, name, offset)
}

/** Gets a textual representation of this element. */
string toString() { result = name }

/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
parent
.getParent()
.getTemplate()
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn - offset, _, _) and
endline = startline and
endcolumn = startcolumn + name.length() - 1
}

/** Gets a the parent of this `FormatArgument`. */
Format getParent() { result = parent }
}

class PositionalFormatArgument extends FormatArgument, TPositional {
/** Gets the index of this positional argument */
int getIndex() { result = name.toInt() }
}

class NamedFormatArgument extends FormatArgument, TNamed {
/** Gets the name of this named argument */
string getName() { result = name }
}

/**
* A regular expression for matching format elements in a formatting template. The
* regular expression is generated from the following python code:
*
* ```python
* identifier = "([A-Za-z_][A-Za-z0-9_]*)"
* integer = "([0-9]+)"
*
* # argument := integer | identifier
* argument = "({integer}|{identifier})".format(integer=integer, identifier=identifier)
*
* # parameter := argument '$'
* parameter = "(({argument})\\$)".format(argument=argument)
*
* # count := parameter | integer
* count = "({parameter}|{integer})".format(integer=integer, parameter=parameter)
*
* # fill := character
* fill = "(.)"
*
* # align := '<' | '^' | '>'
* align = "([<^>])"
*
* # sign := '+' | '-'
* sign = "([+-])"
*
* # width := count
* width = count
*
* # precision := count | '*'
* precision = "({count}|(\\*))".format(count=count)
*
* # type := '' | '?' | 'x?' | 'X?' | identifier
* type = "(|\\?|x\\?|X\\?|{identifier})".format(identifier=identifier)
*
* # format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type
* format_spec = "({fill}?{align})?{sign}?(#)?(0)?{width}?(\\.{precision})?{type}".format(fill=fill, align=align, sign=sign, width=width, precision=precision, type=type)
*
* # format := '{' [ argument ] [ ':' format_spec ] [ ws ] * '}'
* format = "(\\{{{argument}?(:{format_spec})?\s*}\\})".format(argument=argument, format_spec=format_spec)
*
* ```
*/
private string formatRegex() {
result =
"(\\{(([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))?(:((.)?([<^>]))?([+-])?(#)?(0)?(((([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))\\$)|([0-9]+))?(\\.((((([0-9]+)|([A-Za-z_][A-Za-z0-9_]*))\\$)|([0-9]+))|(\\*)))?(|\\?|x\\?|X\\?|([A-Za-z_][A-Za-z0-9_]*)))?\\s*\\})"
}

private string textRegex() { result = "([^{}]|\\{\\{|\\}\\})+" }

private string part(FormatArgsExpr parent, int occurrenceIndex, int occurrenceOffset) {
result =
parent
.getTemplate()
.(LiteralExpr)
.getTextValue()
// TODO: should also handle surrounding quotes and escaped characters
.regexpFind(textRegex() + "|" + formatRegex(), occurrenceIndex, occurrenceOffset)
}

private newtype TFormatTemplateElem =
TText(FormatArgsExpr parent, string text, int index, int offset) {
text = part(parent, index, offset) and text.regexpMatch(textRegex())
} or
TFormat(FormatArgsExpr parent, string text, int index, int offset) {
text = part(parent, index, offset) and text.regexpMatch(formatRegex())
}

private newtype TFormatArgumentKind =
TElement() or
TWidth() or
TPrecision()

private newtype TFormatArgument =
TPositional(TFormat parent, TFormatArgumentKind kind, string value, int offset) {
exists(string text, int formatOffset, int group |
(
group = 3 and offset = formatOffset + 1 and kind = TElement()
or
group = 15 and
offset = formatOffset + min(text.indexOf(value + "$")) and
kind = TWidth()
or
group = 23 and
offset = formatOffset + max(text.indexOf(value + "$")) and
kind = TPrecision()
)
|
parent = TFormat(_, text, _, formatOffset) and
value = text.regexpCapture(formatRegex(), group)
)
} or
TNamed(TFormat parent, TFormatArgumentKind kind, string name, int offset) {
exists(string text, int formatOffset, int group |
(
group = 4 and offset = formatOffset + 1 and kind = TElement()
or
group = 16 and
offset = formatOffset + min(text.indexOf(name + "$")) and
kind = TWidth()
or
group = 24 and
offset = formatOffset + max(text.indexOf(name + "$")) and
kind = TPrecision()
)
|
parent = TFormat(_, text, _, formatOffset) and
name = text.regexpCapture(formatRegex(), group)
)
}
}

0 comments on commit 5883a85

Please sign in to comment.