Adversaries may use Domain Generation Algorithms to establish covert command and control channels, leveraging dynamically generated hostnames to evade detection. SOC teams should proactively hunt for this behavior in Azure Sentinel to identify potential malware communication and disrupt C2 infrastructure early.
KQL Query
let triThreshold = 500;
let querystarttime = 6h;
let dgaLengthThreshold = 8;
// fetch the cisco umbrella top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams to typeof(string)
| summarize triCount=count() by Trigram
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = _Im_WebSession
| where isnotempty(Url)
| extend Name = tolower(tostring(parse_url(Url)["Host"]))
| summarize NameCount=count() by Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
_Im_WebSession
| where isnotempty(Url)
| extend Url = tolower(Url)
| summarize arg_max(TimeGenerated, EventVendor, SrcIpAddr) by Url
| extend Name=tostring(parse_url(Url)["Host"])
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SrcIpAddr, Url
)
on Name
| project StartTime, EndTime, Name, DGADomain, SrcIpAddr, Url, NameCount
id: 9176b18f-a946-42c6-a2f6-0f6d17cd6a8a
name: Potential communication with a Domain Generation Algorithm (DGA) based hostname (ASIM Web Session schema)
description: |
'This rule identifies communication with hosts that have a domain name that might have been generated by a Domain Generation Algorithm (DGA).
DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like nad uses the model to identify domains that may have been randomly generated by an algorithm. You can modify the triThreshold and dgaLengthThreshold query parameters to change Analytic Rule sensitivity. The higher the numbers, the less noisy the rule is.
This analytic rule uses [ASIM](https://aka.ms/AboutASIM) and supports any built-in or custom source that supports the ASIM WebSession schema (ASIM WebSession Schema)'
severity: Medium
requiredDataConnectors:
- connectorId: SquidProxy
dataTypes:
- SquidProxy_CL
- connectorId: Zscaler
dataTypes:
- CommonSecurityLog
queryFrequency: 6h
queryPeriod: 6h
triggerOperator: gt
triggerThreshold: 0
tactics:
- CommandAndControl
tags:
- ParentAlert: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
version: 1.0.0
- Schema: ASIMWebSession
SchemaVersion: 0.2.0
relevantTechniques:
- T1568
query: |
let triThreshold = 500;
let querystarttime = 6h;
let dgaLengthThreshold = 8;
// fetch the cisco umbrella top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams to typeof(string)
| summarize triCount=count() by Trigram
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = _Im_WebSession
| where isnotempty(Url)
| extend Name = tolower(tostring(parse_url(Url)["Host"]))
| summarize NameCount=count() by Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = arra
Scenario: Legitimate scheduled job using a DGA-like hostname for internal service discovery
Example: A scheduled job in Ansible or SaltStack uses a dynamically generated hostname for internal service discovery.
Filter/Exclusion: Exclude traffic to hosts matching the internal service discovery domain pattern (e.g., *.svc.cluster.local or *.internal).
Scenario: Admin task using a DGA-like hostname for testing purposes
Example: A system administrator tests a new DNS configuration by using a DGA-like hostname in a test script or PowerShell script.
Filter/Exclusion: Exclude traffic originating from known admin workstations or user accounts with elevated privileges (e.g., user@admin-workstation or uid=1000).
Scenario: Communication with a legitimate third-party service using a DGA-like domain
Example: A service like Cloudflare or AWS Route 53 might use a DGA-like domain for internal routing or load balancing.
Filter/Exclusion: Exclude traffic to known third-party services or domains listed in the enterprise’s trusted domain list (e.g., *.cloudflare.com, *.aws.amazon.com).
Scenario: False positive from a legitimate application using a DGA-like hostname for dynamic DNS
Example: An application like Docker or Kubernetes uses a DGA-like hostname for dynamic DNS resolution in a containerized environment.
Filter/Exclusion: Exclude traffic from containers or services identified by their container ID or namespace (e.g., container_id=abc123 or ns=kube-system).
Scenario: Legitimate use of a DGA-like hostname in a security tool for simulation
Example: A security tool like Metasploit or **Cobalt