Adversaries may use Domain Generation Algorithms to create dynamic domains for command and control communication, which can evade traditional detection methods. SOC teams should proactively hunt for such behavior in Azure Sentinel to identify potential malware activity and disrupt C2 channels before they cause significant damage.
KQL Query
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
name: Possible contact with a domain generated by a DGA
description: |
'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'
severity: Medium
requiredDataConnectors:
- connectorId: Zscaler
dataTypes:
- CommonSecurityLog
- connectorId: Barracuda
dataTypes:
- CommonSecurityLog
- connectorId: CEF
dataTypes:
- CommonSecurityLog
- connectorId: CheckPoint
dataTypes:
- CommonSecurityLog
- connectorId: CiscoASA
dataTypes:
- CommonSecurityLog
- connectorId: F5
dataTypes:
- CommonSecurityLog
- connectorId: Fortinet
dataTypes:
- CommonSecurityLog
- connectorId: PaloAltoNetworks
dataTypes:
- CommonSecurityLog
queryFrequency: 6h
queryPeriod: 6h
triggerOperator: gt
triggerThreshold: 0
tactics:
- CommandAndControl
relevantTechniques:
- T1568
query: |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
//
| Sentinel Table | Notes |
|---|---|
CommonSecurityLog | Ensure this data connector is enabled |
Scenario: Legitimate domain generation by a security tool
Description: A security tool like Microsoft Defender ATP or CrowdStrike Falcon may generate temporary domains for threat intelligence purposes.
Filter/Exclusion: Exclude domains that match known security tool domain patterns or include specific prefixes like threatintel. or s2s..
Scenario: Scheduled system maintenance job
Description: A scheduled job using PowerShell or Task Scheduler might generate temporary domains for internal monitoring or logging purposes.
Filter/Exclusion: Exclude domains that include timestamps or are associated with internal IP ranges (e.g., 10.0.0.0/8).
Scenario: Admin task using a domain generation algorithm for testing
Description: An administrator might use a script or tool like Python or PowerShell to simulate DGA traffic for testing purposes.
Filter/Exclusion: Exclude domains that are generated using known testing tools or scripts, or that contain specific test identifiers (e.g., test-dga-).
Scenario: Cloud service integration with dynamic domain resolution
Description: A cloud service like AWS Route 53 or Azure DNS might dynamically resolve domains for load balancing or DNS-based routing.
Filter/Exclusion: Exclude domains that are associated with known cloud providers or that match internal DNS zone patterns.
Scenario: Legitimate domain used by a third-party service
Description: A third-party service like Stripe or Twilio might use domain names that resemble DGA-generated domains.
Filter/Exclusion: Exclude domains that are registered to known third-party services or match their domain patterns (e.g., *.stripe.com, *.twilio.com).