Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e6dbd52
Add `RegexExecution` in `Concepts.qll`
owen-mc Feb 9, 2026
44eeee5
Add and improve classes for regex-related methods
owen-mc Feb 10, 2026
fa3fba4
Use new regex-related classes (no functional change)
owen-mc Feb 10, 2026
a22fd39
Use RegexExecution in sanitizer definitions (expands scope)
owen-mc Feb 10, 2026
1ee5728
Add missing QLDoc
owen-mc Feb 11, 2026
6a8204d
"dataflow" -> "data flow" in QLDoc
owen-mc Feb 11, 2026
d0999e3
Add failing test for @Pattern validation
owen-mc Feb 12, 2026
bfe26c1
Add @Pattern as RegexExecution => SSRF sanitizer
owen-mc Feb 12, 2026
c539c2f
Add change note
owen-mc Feb 12, 2026
5bdf550
Fix QLDocs
owen-mc Feb 12, 2026
106254b
Improve QLDocs
owen-mc Feb 13, 2026
953ff9f
PatternAnnotation.getString() should only be field reads
owen-mc Feb 13, 2026
1fefa98
Rename `RegexMatch` and only include expressions
owen-mc Feb 13, 2026
3c161f9
Make contract of RegexMatch clear
owen-mc Feb 13, 2026
c709958
Put imports implementing abstract classes in private module
owen-mc Feb 13, 2026
2e0f244
Improve QLDoc on `RegexMatch.getName()`
owen-mc Feb 13, 2026
ca4c988
Remove redundant variable
owen-mc Feb 13, 2026
90befa0
Add failing test for Matcher.matches() edge case
owen-mc Feb 14, 2026
8f8f4c2
Fix Matcher.matches edge case
owen-mc Feb 14, 2026
d6b71a3
Extend RegexMatch framework to allow for MatcherMatchesCall edge case
owen-mc Feb 15, 2026
16ddb56
Small refactor for stylistic consistency
owen-mc Feb 14, 2026
1f2ebf4
Add failing log injection test for @Pattern validation
owen-mc Feb 14, 2026
2b2f342
Refactor logInjectionGuard part 1
owen-mc Feb 14, 2026
a757178
Refactor logInjectionGuard part 2
owen-mc Feb 14, 2026
85959ea
Expand log injection sanitizer guards to non-annotation regex matches
owen-mc Feb 14, 2026
fa5bd75
Expand log injection sanitizers to annotation regex matches
owen-mc Feb 14, 2026
7c8119a
Rename `MethodCall ma` to `mc`
owen-mc Feb 14, 2026
34acc84
Add change note
owen-mc Feb 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* More ways of checking that a string matches a regular expression are now considered as sanitizers for various queries, including `java/ssrf` and `java/path-injection`. In particular, being annotated with `@javax.validation.constraints.Pattern` is now recognised as a sanitizer for those queries.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Using a regular expression to check that a string doesn't contain any line breaks is already a sanitizer for `java/log-injection`. Additional ways of doing the regular expression check are now recognised, including annotation with `@javax.validation.constraints.Pattern`.
1 change: 1 addition & 0 deletions java/ql/lib/java.qll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import semmle.code.Unit
import semmle.code.java.Annotation
import semmle.code.java.Compilation
import semmle.code.java.CompilationUnit
import semmle.code.java.Concepts
import semmle.code.java.ControlFlowGraph
import semmle.code.java.Dependency
import semmle.code.java.Element
Expand Down
75 changes: 75 additions & 0 deletions java/ql/lib/semmle/code/java/Concepts.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
overlay[local?]
module;

import java

/**
* A module importing the frameworks that implement `RegexMatch`es,
* ensuring that they are visible to the concepts library.
*/
private module Frameworks {
private import semmle.code.java.JDK
private import semmle.code.java.frameworks.JavaxAnnotations
}

/**
* An expression that represents a regular expression match.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexMatch::Range` instead.
*
* These are either method calls, which return `true` when there is a match, or
* annotations, which are considered to match if they are present.
*/
class RegexMatch extends Expr instanceof RegexMatch::Range {
/** Gets the expression for the regex being executed by this node. */
Expr getRegex() { result = super.getRegex() }

/** Gets an expression for the string to be searched or matched against. */
Expr getString() { result = super.getString() }

/** Gets an expression to be sanitized. */
Expr getASanitizedExpr() { result = [this.getString(), super.getAdditionalSanitizedExpr()] }

/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the
* type-qualified name if possible.
*/
string getName() { result = super.getName() }
}

/** Provides classes for modeling regular-expression execution APIs. */
module RegexMatch {
/**
* An expression that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexMatch` instead.
*
* These are either method calls, which return `true` when there is a match, or
* annotations, which are considered to match if they are present.
*/
abstract class Range extends Expr {
/** Gets the expression for the regex being executed by this node. */
abstract Expr getRegex();

/** Gets an expression for the string to be searched or matched against. */
abstract Expr getString();

/** Gets an additional expression to be sanitized, if any. */
Expr getAdditionalSanitizedExpr() { none() }

/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the
* type-qualified name if possible.
*/
abstract string getName();
}
}
8 changes: 7 additions & 1 deletion java/ql/lib/semmle/code/java/JDK.qll
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,19 @@ class StringContainsMethod extends Method {
}

/** A call to the `java.lang.String.matches` method. */
class StringMatchesCall extends MethodCall {
class StringMatchesCall extends MethodCall, RegexMatch::Range {
StringMatchesCall() {
exists(Method m | m = this.getMethod() |
m.getDeclaringType() instanceof TypeString and
m.hasName("matches")
)
}

override Expr getRegex() { result = this.getArgument(0) }

override Expr getString() { result = this.getQualifier() }

override string getName() { result = "String.matches" }
}

/** A call to the `java.lang.String.replaceAll` method. */
Expand Down
35 changes: 35 additions & 0 deletions java/ql/lib/semmle/code/java/frameworks/JavaxAnnotations.qll
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,38 @@ class WebServiceAnnotation extends Annotation {
class WebServiceRefAnnotation extends Annotation {
WebServiceRefAnnotation() { this.getType().hasQualifiedName("javax.xml.ws", "WebServiceRef") }
}

/*
* Annotations in the package `javax.validation.constraints`.
*/

/**
* A `@javax.validation.constraints.Pattern` annotation.
*/
class PatternAnnotation extends Annotation, RegexMatch::Range {
PatternAnnotation() {
this.getType()
.hasQualifiedName(["javax.validation.constraints", "jakarta.validation.constraints"],
"Pattern")
}

override Expr getRegex() { result = this.getValue("regexp") }

override Expr getString() {
// Annotation on field accessed by direct read - value of field will match regexp
result.(FieldRead).getField() = this.getAnnotatedElement()
or
// Annotation on field accessed by getter - value of field will match regexp
result.(MethodCall).getMethod().(GetterMethod).getField() = this.getAnnotatedElement()
or
// Annotation on parameter - value of parameter will match regexp
result.(VarRead).getVariable().(Parameter) = this.getAnnotatedElement()
or
// Annotation on method - return value of method will match regexp
result.(Call).getCallee() = this.getAnnotatedElement()
// TODO - we could also consider the case where the annotation is on a type
// but this harder to model and not very common.
}

override string getName() { result = "@javax.validation.constraints.Pattern annotation" }
}
74 changes: 74 additions & 0 deletions java/ql/lib/semmle/code/java/frameworks/Regex.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ overlay[local?]
module;

import java
private import semmle.code.java.dataflow.DataFlow

/** The class `java.util.regex.Matcher`. */
class TypeRegexMatcher extends Class {
Expand All @@ -24,6 +25,16 @@ class TypeRegexPattern extends Class {
TypeRegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
}

/**
* The `compile` method of `java.util.regex.Pattern`.
*/
class PatternCompileMethod extends Method {
PatternCompileMethod() {
this.getDeclaringType() instanceof TypeRegexPattern and
this.hasName("compile")
}
}

/**
* The `matches` method of `java.util.regex.Pattern`.
*/
Expand Down Expand Up @@ -59,3 +70,66 @@ class PatternLiteralField extends Field {
this.hasName("LITERAL")
}
}

/** A call to the `compile` method of `java.util.regex.Pattern`. */
class PatternCompileCall extends MethodCall {
PatternCompileCall() { this.getMethod() instanceof PatternCompileMethod }
}

/** A call to the `matcher` method of `java.util.regex.Pattern`. */
class PatternMatcherCall extends MethodCall {
PatternMatcherCall() { this.getMethod() instanceof PatternMatcherMethod }
}

/** A call to the `matches` method of `java.util.regex.Pattern`. */
class PatternMatchesCall extends MethodCall, RegexMatch::Range {
PatternMatchesCall() { this.getMethod() instanceof PatternMatchesMethod }

override Expr getRegex() { result = this.getArgument(0) }

override Expr getString() { result = this.getArgument(1) }

override string getName() { result = "Pattern.matches" }
}

/** A call to the `matches` method of `java.util.regex.Matcher`. */
class MatcherMatchesCall extends MethodCall, RegexMatch::Range {
MatcherMatchesCall() { this.getMethod() instanceof MatcherMatchesMethod }

/**
* Gets the call to `java.util.regex.Pattern.matcher` that returned the
* qualifier of this call. This is needed to determine the string being
* matched.
*/
PatternMatcherCall getPatternMatcherCall() {
DataFlow::localExprFlow(result, this.getQualifier())
}

/**
* Gets the call to `java.util.regex.Pattern.compile` that returned the
* `Pattern` used by this matcher. This is needed to determine the regular
* expression being used.
*/
PatternCompileCall getPatternCompileCall() {
DataFlow::localExprFlow(result, this.getPatternMatcherCall())
}

override Expr getRegex() { result = this.getPatternCompileCall().getArgument(0) }

override Expr getString() { result = this.getPatternMatcherCall().getArgument(0) }

override Expr getAdditionalSanitizedExpr() {
// Special case for MatcherMatchesCall. Consider the following code:
//
// Matcher matcher = Pattern.compile(regexp).matcher(taintedInput);
// if (matcher.matches()) {
// sink(matcher.group(1));
// }
//
// Even though the string is `taintedInput`, we also want to sanitize
// `matcher` as it can be used to get substrings of `taintedInput`.
result = this.getQualifier()
}

override string getName() { result = "Matcher.matches" }
}
103 changes: 57 additions & 46 deletions java/ql/lib/semmle/code/java/security/LogInjection.qll
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ private class LineBreaksLogInjectionSanitizer extends LogInjectionSanitizer {
}

private predicate stringMethodCall(
MethodCall ma, CompileTimeConstantExpr arg0, CompileTimeConstantExpr arg1
MethodCall mc, CompileTimeConstantExpr arg0, CompileTimeConstantExpr arg1
) {
ma.getMethod().getDeclaringType() instanceof TypeString and
arg0 = ma.getArgument(0) and
arg1 = ma.getArgument(1)
mc.getMethod().getDeclaringType() instanceof TypeString and
arg0 = mc.getArgument(0) and
arg1 = mc.getArgument(1)
}

private predicate stringMethodArgument(CompileTimeConstantExpr arg) {
Expand All @@ -64,22 +64,23 @@ private predicate stringMethodArgumentValueMatches(CompileTimeConstantExpr const
}

/**
* Holds if the return value of `ma` is sanitized against log injection attacks
* by removing line breaks from it.
* Holds if `e` is sanitized against log injection attacks by removing line
* breaks from it.
*/
private predicate logInjectionSanitizer(MethodCall ma) {
exists(CompileTimeConstantExpr target, CompileTimeConstantExpr replacement |
stringMethodCall(ma, target, replacement) and
private predicate logInjectionSanitizer(Expr e) {
exists(MethodCall mc, CompileTimeConstantExpr target, CompileTimeConstantExpr replacement |
e = mc and
stringMethodCall(mc, target, replacement) and
not stringMethodArgumentValueMatches(replacement, ["%\n%", "%\r%"])
|
ma.getMethod().hasName("replace") and
mc.getMethod().hasName("replace") and
not replacement.getIntValue() = [10, 13] and
(
target.getIntValue() = [10, 13] or // 10 == '\n', 13 == '\r'
target.getStringValue() = ["\n", "\r"]
)
or
ma.getMethod().hasName("replaceAll") and
mc.getMethod().hasName("replaceAll") and
(
// Replace anything not in an allow list
target.getStringValue().matches("[^%]") and
Expand All @@ -89,48 +90,58 @@ private predicate logInjectionSanitizer(MethodCall ma) {
target.getStringValue() = ["\n", "\r", "\\n", "\\r", "\\R"]
)
)
or
exists(RegexMatch rm, CompileTimeConstantExpr target |
rm instanceof Annotation and
e = rm.getASanitizedExpr() and
target = rm.getRegex() and
regexPreventsLogInjection(target.getStringValue(), true)
)
}

/**
* Holds if `g` guards `e` in branch `branch` against log injection attacks
* by checking if there are line breaks in `e`.
*/
private predicate logInjectionGuard(Guard g, Expr e, boolean branch) {
exists(MethodCall ma, CompileTimeConstantExpr target |
ma = g and
target = ma.getArgument(0)
|
ma.getMethod().getDeclaringType() instanceof TypeString and
ma.getMethod().hasName("contains") and
target.getStringValue() = ["\n", "\r"] and
e = ma.getQualifier() and
exists(MethodCall mc | mc = g |
mc.getMethod() instanceof StringContainsMethod and
mc.getArgument(0).(CompileTimeConstantExpr).getStringValue() = ["\n", "\r"] and
e = mc.getQualifier() and
branch = false
or
ma.getMethod().hasName("matches") and
(
ma.getMethod().getDeclaringType() instanceof TypeString and
e = ma.getQualifier()
or
ma.getMethod().getDeclaringType().hasQualifiedName("java.util.regex", "Pattern") and
e = ma.getArgument(1)
) and
(
// Allow anything except line breaks
(
not target.getStringValue().matches("%[^%]%") and
not target.getStringValue().matches("%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%")
or
target.getStringValue().matches("%[^%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%]%")
) and
branch = true
or
// Disallow line breaks
(
not target.getStringValue().matches("%[^%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%]%") and
// Assuming a regex containing line breaks is correctly matching line breaks in a string
target.getStringValue().matches("%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%")
) and
branch = false
)
)
or
exists(RegexMatch rm, CompileTimeConstantExpr target |
rm = g and
not rm instanceof Annotation and
target = rm.getRegex() and
e = rm.getASanitizedExpr()
|
regexPreventsLogInjection(target.getStringValue(), branch)
)
}

/**
* Holds if `regex` matches against a pattern that allows anything except
* line breaks when `branch` is `true`, or a pattern that matches line breaks
* when `branch` is `false`.
*/
bindingset[regex]
private predicate regexPreventsLogInjection(string regex, boolean branch) {
// Allow anything except line breaks
(
not regex.matches("%[^%]%") and
not regex.matches("%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%")
or
regex.matches("%[^%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%]%")
) and
branch = true
or
// Disallow line breaks
(
not regex.matches("%[^%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%]%") and
// Assuming a regex containing line breaks is correctly matching line breaks in a string
regex.matches("%" + ["\n", "\r", "\\n", "\\r", "\\R"] + "%")
) and
branch = false
}
Loading