Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* Modified SSRF tests to use postprocessing to more easily debug results.
* Added new full SSRF sanitization barrier from the new AntiSSRF library.
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,48 @@ module ServerSideRequestForgery {
strNode = [call.getArg(0), call.getArgByName("string")]
)
}

/** A validation that a string does not contain certain characters, considered as a sanitizer. */
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc comment above UriValidator appears to be copy/pasted from StringRestrictionSanitizerGuard and doesn’t match what follows (this is about URI/domain validation rather than restricting characters). Please update the comment so it accurately describes the sanitizer/guard being introduced.

Suggested change
/** A validation that a string does not contain certain characters, considered as a sanitizer. */
/** A validation that a URI belongs to an allowed domain, considered as a full-URL control sanitizer. */

Copilot uses AI. Check for mistakes.
private class UriValidator extends FullUrlControlSanitizer {
UriValidator() { this = DataFlow::BarrierGuard<uri_validator/3>::getABarrierNode() }
}

import semmle.python.dataflow.new.internal.DataFlowPublic

private predicate uri_validator(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(DataFlow::CallCfgNode call, string funcs |
funcs in ["in_domain", "in_azure_keyvault_domain", "in_azure_storage_domain"]
|
call = API::moduleImport("AntiSSRF").getMember("URIValidator").getMember(funcs).getACall() and
call.getArg(0).asCfgNode() = node and
(
// validator used in a comparison
exists(CompareNode cn, Cmpop op, Node n | cn = g and n.getALocalSource() = call |
(
// validator == true or validator == false or validator is True or validator is False
(op instanceof Eq or op instanceof Is) and
exists(ControlFlowNode l, boolean bool |
l.getNode().(BooleanLiteral).booleanValue() = bool and
bool in [true, false] and
branch = bool and
cn.operands(n.asCfgNode(), op, l)
)
or
// validator != false or validator != true or validator is not True or validator is not False
(op instanceof NotEq or op instanceof IsNot) and
exists(ControlFlowNode l, boolean bool |
l.getNode().(BooleanLiteral).booleanValue() = bool and
bool in [true, false] and
branch = bool.booleanNot() and
cn.operands(n.asCfgNode(), op, l)
)
)
)
or
// validator call directly (e.g., if URIValidator.in_domain(...) )
g = call.asCfgNode() and
branch = true
)
)
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Security/CWE-918/FullServerSideRequestForgery.ql
query: Security/CWE-918/FullServerSideRequestForgery.ql
postprocess: utils/test/InlineExpectationsTestQuery.ql

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Security/CWE-918/PartialServerSideRequestForgery.ql
query: Security/CWE-918/PartialServerSideRequestForgery.ql
postprocess: utils/test/InlineExpectationsTestQuery.ql
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from flask import request
from flask import request # $ Source

import requests
import requests
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s a trailing whitespace after requests on this import line; please remove it to keep the test file clean (especially since these files are heavily diffed for inline expectations).

Suggested change
import requests
import requests

Copilot uses AI. Check for mistakes.
import re

def full_ssrf():
user_input = request.args['untrusted_input']
query_val = request.args['query_val']

requests.get(user_input) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(user_input) # $ Alert[py/full-ssrf]

url = "https://" + user_input
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

# although the path `/foo` is added here, this can be circumvented such that the
# final URL is `https://evil.com/#/foo" -- since the fragment (#) is not sent to the
# server.
url = "https://" + user_input + "/foo"
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

# this might seem like a dummy test, but it serves to check how our sanitizers work.
url = "https://" + user_input + "/foo?key=" + query_val
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

# taint-steps are added as `fromNode -> toNode`, but when adding a sanitizer it's
# currently only possible to so on either `fromNode` or `toNode` (either all edges in
Expand All @@ -39,72 +43,87 @@ def full_ssrf_format():

# using .format
url = "https://{}".format(user_input)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://{}/foo".format(user_input)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://{}/foo?key={}".format(user_input, query_val)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://{x}".format(x=user_input)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://{1}".format(0, user_input)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

def full_ssrf_percent_format():
user_input = request.args['untrusted_input']
query_val = request.args['query_val']

# using %-formatting
url = "https://%s" % user_input
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://%s/foo" % user_input
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = "https://%s/foo/key=%s" % (user_input, query_val)
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full and partial control
requests.get(url) # $ Alert[py/partial-ssrf] $ MISSING: Alert[py/full-ssrf]

def full_ssrf_f_strings():
user_input = request.args['untrusted_input']
query_val = request.args['query_val']

# using f-strings
url = f"https://{user_input}"
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = f"https://{user_input}/foo"
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]

url = f"https://{user_input}/foo?key={query_val}"
requests.get(url) # NOT OK -- user has full control
# NOT OK -- user has full control
requests.get(url) # $ Alert[py/full-ssrf]


def partial_ssrf_1():
user_input = request.args['untrusted_input']

url = "https://example.com/foo?" + user_input
requests.get(url) # NOT OK -- user controls query parameters
# NOT OK -- user controls query parameters
requests.get(url) # $ Alert[py/partial-ssrf]

def partial_ssrf_2():
user_input = request.args['untrusted_input']

url = "https://example.com/" + user_input
requests.get(url) # NOT OK -- user controls path
# NOT OK -- user controls path
requests.get(url) # $ Alert[py/partial-ssrf]

def partial_ssrf_3():
user_input = request.args['untrusted_input']

url = "https://example.com/" + user_input
requests.get(url) # NOT OK -- user controls path
# NOT OK -- user controls path
requests.get(url) # $ Alert[py/partial-ssrf]

def partial_ssrf_4():
user_input = request.args['untrusted_input']

url = "https://example.com/foo#{}".format(user_input)
requests.get(url) # NOT OK -- user contollred fragment
# NOT OK -- user contollred fragment
requests.get(url) # $ Alert[py/partial-ssrf]
Comment on lines 124 to +126
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling: "contollred" should be "controlled" (appears twice in this file).

Copilot uses AI. Check for mistakes.

def partial_ssrf_5():
user_input = request.args['untrusted_input']
Expand All @@ -113,20 +132,22 @@ def partial_ssrf_5():
# controlled

url = "https://example.com/foo#%s" % user_input
requests.get(url) # NOT OK -- user contollred fragment
# NOT OK -- user contollred fragment
requests.get(url) # $ Alert[py/partial-ssrf]
Comment on lines 134 to +136
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling: "contollred" should be "controlled".

Copilot uses AI. Check for mistakes.

def partial_ssrf_6():
user_input = request.args['untrusted_input']

url = f"https://example.com/foo#{user_input}"
requests.get(url) # NOT OK -- user only controlled fragment
# NOT OK -- user only controlled fragment
requests.get(url) # $ Alert[py/partial-ssrf]

def partial_ssrf_7():
user_input = request.args['untrusted_input']

if user_input.isalnum():
url = f"https://example.com/foo#{user_input}"
requests.get(url) # OK - user input can only contain alphanumerical characters
requests.get(url) # OK - user input can only contain alphanumerical characters

if user_input.isalpha():
url = f"https://example.com/foo#{user_input}"
Expand Down Expand Up @@ -154,7 +175,8 @@ def partial_ssrf_7():

if re.fullmatch(r'.*[a-zA-Z0-9]+.*', user_input):
url = f"https://example.com/foo#{user_input}"
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary characters
# NOT OK, but NOT FOUND - user input can contain arbitrary characters
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]


if re.match(r'^[a-zA-Z0-9]+$', user_input):
Expand All @@ -163,7 +185,8 @@ def partial_ssrf_7():

if re.match(r'[a-zA-Z0-9]+', user_input):
url = f"https://example.com/foo#{user_input}"
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
# NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]

reg = re.compile(r'^[a-zA-Z0-9]+$')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from azure.keyvault.keys import KeyClient
from azure.storage.blob import ContainerClient
from azure.storage.blob import download_blob_from_url

from flask import request
from flask import request # $ Source

def azure_sdk_test(credential, output_path):
user_input = request.args['untrusted_input']
Expand All @@ -13,24 +12,14 @@ def azure_sdk_test(credential, output_path):
url = f"https://example.com/foo#{user_input}"
full_url = f"https://{user_input2}"
# Testing Azure sink
c = SecretClient(vault_url=url, credential=credential)# NOT OK -- user only controlled fragment
c = SecretClient(vault_url=full_url, credential=credential) # NOT OK -- user has full control
c = ShareFileClient.from_file_url(url) # NOT OK -- user only controlled fragment
c = ShareFileClient.from_file_url(full_url) # NOT OK -- user has full control
c = KeyClient(url, credential)# NOT OK -- user only controlled fragment
c = KeyClient(full_url, credential) # NOT OK -- user has full control
c = ContainerClient.from_container_url(container_url=url, credential=credential) # NOT OK -- user only controlled fragment
c = ContainerClient.from_container_url(container_url=full_url, credential=credential) # NOT OK -- user has full control
c = SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
c = ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
c = ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
c = KeyClient(url, credential) # $ Alert[py/partial-ssrf]
c = KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
c = ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
Comment on lines +15 to +22
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variable c is not used.

Suggested change
c = SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
c = ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
c = ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
c = KeyClient(url, credential) # $ Alert[py/partial-ssrf]
c = KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
c = ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
KeyClient(url, credential) # $ Alert[py/partial-ssrf]
KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]

Copilot uses AI. Check for mistakes.
Comment on lines +15 to +22
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assignment to 'c' is unnecessary as it is redefined before this value is used.

This issue also appears in the following locations of the same file:

  • line 16
  • line 17
  • line 18
  • line 19
  • line 20
  • ...and 1 more
Suggested change
c = SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
c = ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
c = ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
c = KeyClient(url, credential) # $ Alert[py/partial-ssrf]
c = KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
c = ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
c = ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
KeyClient(url, credential) # $ Alert[py/partial-ssrf]
KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]

Copilot uses AI. Check for mistakes.

download_blob_from_url(
blob_url=url, # NOT OK -- user only controlled fragment
output=output_path,
credential=credential,
overwrite=True
)
download_blob_from_url(
blob_url=full_url, # NOT OK -- user has full control
output=output_path,
credential=credential,
overwrite=True
)
download_blob_from_url(blob_url=url, output=output_path, credential=credential, overwrite=True ) # $ Alert[py/partial-ssrf]
download_blob_from_url(blob_url=full_url, output=output_path, credential=credential, overwrite=True) # $ Alert[py/full-ssrf]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import Flask, request

from flask import request # $ Source
from flask import Flask
from http.client import HTTPConnection

app = Flask(__name__)
Expand All @@ -10,28 +10,35 @@ def ssrf_test():
unsafe_path = request.args["path"]
user_input = request.args['untrusted_input']

conn = HTTPConnection(unsafe_host)
conn.request("GET", unsafe_path) # NOT OK -- user has full control
conn = HTTPConnection(unsafe_host) # $ Sink[py/full-ssrf]
# NOT OK -- user has full control
conn.request("GET", unsafe_path) # $ Alert[py/full-ssrf]

# Full SSRF variant, where there is ALSO made a request with fixed URL on the same
# Full SSRF variant, where there is AlSO made a request with fixed URL on the same
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling: "AlSO" should be "also".

Suggested change
# Full SSRF variant, where there is AlSO made a request with fixed URL on the same
# Full SSRF variant, where there is also made a request with fixed URL on the same

Copilot uses AI. Check for mistakes.
# connection later on. This should not change anything on the overall SSRF alerts.
conn = HTTPConnection(unsafe_host)
conn.request("GET", unsafe_path) # NOT OK -- user has full control
conn = HTTPConnection(unsafe_host) # $ Sink
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sink annotation here is inconsistent with the other sink annotations in this file (# $ Sink[py/full-ssrf] / # $ Sink[py/partial-ssrf]). Please specify the kind for this sink as well so the inline expectations remain unambiguous.

Suggested change
conn = HTTPConnection(unsafe_host) # $ Sink
conn = HTTPConnection(unsafe_host) # $ Sink[py/full-ssrf]

Copilot uses AI. Check for mistakes.
# NOT OK -- user has full control
conn.request("GET", unsafe_path) # $ Alert[py/full-ssrf]

# partial SSRF on SAME connection
conn.request("GET", "/foo") # NOT OK -- user has control of host
# NOT OK -- user has control of host
conn.request("GET", "/foo") # $ Alert[py/partial-ssrf]

# the rest are partial SSRF
conn = HTTPConnection(unsafe_host)
conn.request("GET", "/foo") # NOT OK -- user controlled domain
conn = HTTPConnection(unsafe_host) # $ Sink[py/partial-ssrf]
# NOT OK -- user controlled domain
conn.request("GET", "/foo") # $ Alert[py/partial-ssrf]

conn = HTTPConnection("example.com")
conn.request("GET", unsafe_path) # NOT OK -- user controlled path
# NOT OK -- user controlled path
conn.request("GET", unsafe_path) # $ Alert[py/partial-ssrf]

path = "foo?" + user_input
conn = HTTPConnection("example.com")
conn.request("GET", path) # NOT OK -- user controlled query parameters
# NOT OK -- user controlled query parameters
conn.request("GET", path) # $ Alert[py/partial-ssrf]

path = "foo#" + user_input
conn = HTTPConnection("example.com")
conn.request("GET", path) # NOT OK -- user controlled fragment
# NOT OK -- user controlled fragment
conn.request("GET", path) # $ Alert[py/partial-ssrf]
Loading
Loading