From 22255ede65ba67b3d855dc4145b2e6cd479b681d Mon Sep 17 00:00:00 2001
From: Mzack9999 <mzack9999@protonmail.com>
Date: Mon, 27 Apr 2026 11:29:22 +0800
Subject: [PATCH 1/3] adding per-host rate limit

---
 cmd/nuclei/main.go                            |   2 +
 internal/runner/runner.go                     |  16 +
 internal/server/nuclei_sdk.go                 |   3 +
 lib/config.go                                 |  18 +
 lib/multi.go                                  |  40 ++-
 lib/sdk.go                                    |   4 +
 lib/sdk_private.go                            |  13 +
 .../common/hostratelimit/hostratelimit.go     | 258 ++++++++++++++
 .../hostratelimit/hostratelimit_perf_test.go  | 336 ++++++++++++++++++
 .../hostratelimit/hostratelimit_test.go       | 238 +++++++++++++
 pkg/protocols/dns/request.go                  |   2 +-
 pkg/protocols/http/request.go                 |  25 +-
 pkg/protocols/http/request_fuzz.go            |   2 +-
 pkg/protocols/protocols.go                    |  23 ++
 pkg/types/types.go                            |   8 +
 15 files changed, 973 insertions(+), 15 deletions(-)
 create mode 100644 pkg/protocols/common/hostratelimit/hostratelimit.go
 create mode 100644 pkg/protocols/common/hostratelimit/hostratelimit_perf_test.go
 create mode 100644 pkg/protocols/common/hostratelimit/hostratelimit_test.go

diff --git a/cmd/nuclei/main.go b/cmd/nuclei/main.go
index f4cb0280c2..e340b4bab7 100644
--- a/cmd/nuclei/main.go
+++ b/cmd/nuclei/main.go
@@ -419,6 +419,8 @@ on extensive configurability, massive extensibility and ease of use.`)
 		flagSet.IntVarP(&options.RateLimit, "rate-limit", "rl", 150, "maximum number of requests to send per second"),
 		flagSet.DurationVarP(&options.RateLimitDuration, "rate-limit-duration", "rld", time.Second, "maximum number of requests to send per second"),
 		flagSet.IntVarP(&options.RateLimitMinute, "rate-limit-minute", "rlm", 0, "maximum number of requests to send per minute (DEPRECATED)"),
+		flagSet.IntVarP(&options.RateLimitHost, "rate-limit-host", "rlh", 0, "maximum number of requests to send per host per rate-limit-host-duration (0 = disabled)"),
+		flagSet.DurationVarP(&options.RateLimitHostDuration, "rate-limit-host-duration", "rlhd", time.Second, "refill interval for the per-host rate limit bucket"),
 		flagSet.IntVarP(&options.BulkSize, "bulk-size", "bs", 25, "maximum number of hosts to be analyzed in parallel per template"),
 		flagSet.IntVarP(&options.TemplateThreads, "concurrency", "c", 25, "maximum number of templates to be executed in parallel"),
 		flagSet.IntVarP(&options.HeadlessBulkSize, "headless-bulk-size", "hbs", 10, "maximum number of headless hosts to be analyzed in parallel per template"),
diff --git a/internal/runner/runner.go b/internal/runner/runner.go
index 3b4ab1fb1b..0abcd5b0bb 100644
--- a/internal/runner/runner.go
+++ b/internal/runner/runner.go
@@ -52,6 +52,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/globalmatchers"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolinit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/honeypotdetector"
@@ -89,6 +90,7 @@ type Runner struct {
 	issuesClient       reporting.Client
 	browser            *engine.Browser
 	rateLimiter        *ratelimit.Limiter
+	hostRateLimiter    *hostratelimit.Pool
 	hostErrors         hosterrorscache.CacheInterface
 	resumeCfg          *types.ResumeCfg
 	pprofServer        *pprofutil.PprofServer
@@ -406,6 +408,17 @@ func New(options *types.Options) (*Runner, error) {
 	}
 	runner.rateLimiter = utils.GetRateLimiter(context.Background(), options.RateLimit, options.RateLimitDuration)
 
+	if options.RateLimitHost > 0 {
+		hostDuration := options.RateLimitHostDuration
+		if hostDuration == 0 {
+			hostDuration = time.Second
+		}
+		runner.hostRateLimiter = hostratelimit.NewPool(context.Background(), hostratelimit.Options{
+			MaxCount: uint(options.RateLimitHost),
+			Duration: hostDuration,
+		})
+	}
+
 	// Initialization successful, disable cleanup on error
 	cleanupOnError = false
 	return runner, nil
@@ -454,6 +467,7 @@ func (r *Runner) Close() {
 	if r.rateLimiter != nil {
 		r.rateLimiter.Stop()
 	}
+	r.hostRateLimiter.Stop()
 	r.progress.Stop()
 	if r.browser != nil {
 		r.browser.Close()
@@ -517,6 +531,7 @@ func (r *Runner) RunEnumeration() error {
 			Catalog:            r.catalog,
 			IssuesClient:       r.issuesClient,
 			RateLimiter:        r.rateLimiter,
+			HostRateLimiter:    r.hostRateLimiter,
 			Interactsh:         r.interactsh,
 			ProjectFile:        r.projectFile,
 			Browser:            r.browser,
@@ -573,6 +588,7 @@ func (r *Runner) RunEnumeration() error {
 		Catalog:             r.catalog,
 		IssuesClient:        r.issuesClient,
 		RateLimiter:         r.rateLimiter,
+		HostRateLimiter:     r.hostRateLimiter,
 		Interactsh:          r.interactsh,
 		ProjectFile:         r.projectFile,
 		Browser:             r.browser,
diff --git a/internal/server/nuclei_sdk.go b/internal/server/nuclei_sdk.go
index 1f6a3dfe25..7898718631 100644
--- a/internal/server/nuclei_sdk.go
+++ b/internal/server/nuclei_sdk.go
@@ -29,6 +29,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/globalmatchers"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/utils/excludematchers"
 	browserEngine "github.com/projectdiscovery/nuclei/v3/pkg/protocols/headless/engine"
@@ -51,6 +52,7 @@ type NucleiExecutorOptions struct {
 	Catalog            catalog.Catalog
 	IssuesClient       reporting.Client
 	RateLimiter        *ratelimit.Limiter
+	HostRateLimiter    *hostratelimit.Pool
 	Interactsh         *interactsh.Client
 	ProjectFile        *projectfile.ProjectFile
 	Browser            *browserEngine.Browser
@@ -74,6 +76,7 @@ func newNucleiExecutor(opts *NucleiExecutorOptions) (*nucleiExecutor, error) {
 		Catalog:             opts.Catalog,
 		IssuesClient:        opts.IssuesClient,
 		RateLimiter:         opts.RateLimiter,
+		HostRateLimiter:     opts.HostRateLimiter,
 		Interactsh:          opts.Interactsh,
 		ProjectFile:         opts.ProjectFile,
 		Browser:             opts.Browser,
diff --git a/lib/config.go b/lib/config.go
index b19e99faf1..2885459d75 100644
--- a/lib/config.go
+++ b/lib/config.go
@@ -187,6 +187,24 @@ func WithGlobalRateLimitCtx(ctx context.Context, maxTokens int, duration time.Du
 	}
 }
 
+// WithHostRateLimit sets a per-host rate limit, in addition to the global
+// rate limit. Each unique target host will be capped at maxTokens requests
+// per duration. Pass maxTokens=0 to disable per-host limiting (the default).
+//
+// The per-host limiter complements WithGlobalRateLimit: a request acquires a
+// token from the global limiter first, then from the per-host limiter. This
+// means the global limit still bounds total scan throughput while the
+// per-host limit prevents any single target from being overwhelmed.
+func WithHostRateLimit(ctx context.Context, maxTokens int, duration time.Duration) NucleiSDKOptions {
+	return func(e *NucleiEngine) error {
+		e.opts.RateLimitHost = maxTokens
+		e.opts.RateLimitHostDuration = duration
+		// Lazily constructed in init() so we honor whichever ctx the
+		// engine ultimately runs under; this option only records intent.
+		return nil
+	}
+}
+
 // HeadlessOpts contains options for headless templates
 type HeadlessOpts struct {
 	PageTimeout     int // timeout for page load
diff --git a/lib/multi.go b/lib/multi.go
index 535eca0cb0..18c4cfefe7 100644
--- a/lib/multi.go
+++ b/lib/multi.go
@@ -11,6 +11,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/loader/workflow"
 	"github.com/projectdiscovery/nuclei/v3/pkg/output"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/types"
 	"github.com/projectdiscovery/nuclei/v3/pkg/utils"
 	"github.com/projectdiscovery/utils/errkit"
@@ -29,17 +30,18 @@ type unsafeOptions struct {
 func createEphemeralObjects(ctx context.Context, base *NucleiEngine, opts *types.Options) (*unsafeOptions, error) {
 	u := &unsafeOptions{}
 	u.executerOpts = &protocols.ExecutorOptions{
-		Output:       base.customWriter,
-		Options:      opts,
-		Progress:     base.customProgress,
-		Catalog:      base.catalog,
-		IssuesClient: base.rc,
-		RateLimiter:  base.rateLimiter,
-		Interactsh:   base.interactshClient,
-		Colorizer:    aurora.NewAurora(true),
-		ResumeCfg:    types.NewResumeCfg(),
-		Parser:       base.parser,
-		Browser:      base.browserInstance,
+		Output:          base.customWriter,
+		Options:         opts,
+		Progress:        base.customProgress,
+		Catalog:         base.catalog,
+		IssuesClient:    base.rc,
+		RateLimiter:     base.rateLimiter,
+		HostRateLimiter: base.hostRateLimiter,
+		Interactsh:      base.interactshClient,
+		Colorizer:       aurora.NewAurora(true),
+		ResumeCfg:       types.NewResumeCfg(),
+		Parser:          base.parser,
+		Browser:         base.browserInstance,
 	}
 	if opts.ShouldUseHostError() && base.hostErrCache != nil {
 		u.executerOpts.HostErrorsCache = base.hostErrCache
@@ -52,6 +54,21 @@ func createEphemeralObjects(ctx context.Context, base *NucleiEngine, opts *types
 		opts.RateLimitDuration = time.Second
 	}
 	u.executerOpts.RateLimiter = utils.GetRateLimiter(ctx, opts.RateLimit, opts.RateLimitDuration)
+
+	// Per-call ephemeral host rate limiter; the goroutine cost is paid once
+	// per ExecuteNucleiWithOpts invocation and Stop()-ed in
+	// closeEphemeralObjects so we do not leak limiters across calls.
+	if opts.RateLimitHost > 0 {
+		hostDuration := opts.RateLimitHostDuration
+		if hostDuration == 0 {
+			hostDuration = time.Second
+		}
+		u.executerOpts.HostRateLimiter = hostratelimit.NewPool(ctx, hostratelimit.Options{
+			MaxCount: uint(opts.RateLimitHost),
+			Duration: hostDuration,
+		})
+	}
+
 	u.engine = core.New(opts)
 	u.engine.SetExecuterOptions(u.executerOpts)
 	return u, nil
@@ -62,6 +79,7 @@ func closeEphemeralObjects(u *unsafeOptions) {
 	if u.executerOpts.RateLimiter != nil {
 		u.executerOpts.RateLimiter.Stop()
 	}
+	u.executerOpts.HostRateLimiter.Stop()
 	// dereference all objects that were inherited from base nuclei engine
 	// since these are meant to be closed globally by base nuclei engine
 	u.executerOpts.Output = nil
diff --git a/lib/sdk.go b/lib/sdk.go
index 4f1740b452..4ba73ba0de 100644
--- a/lib/sdk.go
+++ b/lib/sdk.go
@@ -21,6 +21,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/generators"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolinit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/headless/engine"
@@ -72,6 +73,7 @@ type NucleiEngine struct {
 	interactshClient *interactsh.Client
 	catalog          catalog.Catalog
 	rateLimiter      *ratelimit.Limiter
+	hostRateLimiter  *hostratelimit.Pool
 	store            *loader.Store
 	httpxClient      providerTypes.InputLivenessProbe
 	inputProvider    provider.InputProvider
@@ -228,6 +230,8 @@ func (e *NucleiEngine) closeInternal() {
 	if e.rateLimiter != nil {
 		e.rateLimiter.Stop()
 	}
+	e.executerOpts.HostRateLimiter.Stop()
+	e.hostRateLimiter.Stop()
 	if e.inputProvider != nil {
 		e.inputProvider.Close()
 	}
diff --git a/lib/sdk_private.go b/lib/sdk_private.go
index 9f427dd0ba..2d60c7dcb3 100644
--- a/lib/sdk_private.go
+++ b/lib/sdk_private.go
@@ -26,6 +26,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/progress"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolinit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
@@ -101,6 +102,17 @@ func (e *NucleiEngine) applyRequiredDefaults(ctx context.Context) {
 		e.rateLimiter = nucleiUtils.GetRateLimiter(ctx, e.opts.RateLimit, e.opts.RateLimitDuration)
 	}
 
+	if e.hostRateLimiter == nil && e.opts.RateLimitHost > 0 {
+		hostDuration := e.opts.RateLimitHostDuration
+		if hostDuration == 0 {
+			hostDuration = time.Second
+		}
+		e.hostRateLimiter = hostratelimit.NewPool(ctx, hostratelimit.Options{
+			MaxCount: uint(e.opts.RateLimitHost),
+			Duration: hostDuration,
+		})
+	}
+
 	if e.opts.ExcludeTags == nil {
 		e.opts.ExcludeTags = []string{}
 	}
@@ -212,6 +224,7 @@ func (e *NucleiEngine) init(ctx context.Context) error {
 		Catalog:            e.catalog,
 		IssuesClient:       e.rc,
 		RateLimiter:        e.rateLimiter,
+		HostRateLimiter:    e.hostRateLimiter,
 		Interactsh:         e.interactshClient,
 		Colorizer:          aurora.NewAurora(true),
 		ResumeCfg:          types.NewResumeCfg(),
diff --git a/pkg/protocols/common/hostratelimit/hostratelimit.go b/pkg/protocols/common/hostratelimit/hostratelimit.go
new file mode 100644
index 0000000000..252e30542b
--- /dev/null
+++ b/pkg/protocols/common/hostratelimit/hostratelimit.go
@@ -0,0 +1,258 @@
+// Package hostratelimit provides a per-host rate limiter pool with bounded
+// eviction, mirroring the per-host HTTP client pool model.
+//
+// Each host gets its own *ratelimit.Limiter on first use, lazily. Idle entries
+// are reclaimed by a background sweep, and a hard cap evicts the least-recently
+// used entry when the pool grows beyond MaxHosts so worst-case memory is
+// bounded on long-running scans across many targets.
+//
+// Unlike the *retryablehttp.Client pool, each *ratelimit.Limiter runs an
+// internal goroutine, so eviction MUST call Stop() to release it - otherwise
+// a long scan that touches many short-lived hostnames would leak one
+// goroutine per unique host. mapsutil.SyncLockMap.WithEviction has no
+// on-evict callback, hence this small bespoke pool.
+package hostratelimit
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/projectdiscovery/ratelimit"
+)
+
+const (
+	// DefaultInactivity is how long an idle per-host limiter is retained
+	// before the background sweep reclaims it.
+	DefaultInactivity = 90 * time.Second
+	// DefaultSweepInterval controls how often the background sweep runs.
+	DefaultSweepInterval = 30 * time.Second
+	// DefaultMaxHosts caps the pool to bound worst-case memory across very
+	// large input sets. When exceeded, the LRU entry is evicted on insert.
+	DefaultMaxHosts = 4096
+)
+
+// Options configures a Pool.
+type Options struct {
+	// MaxCount is the bucket size for each per-host limiter (e.g. 60).
+	// Required: a Pool with MaxCount == 0 is treated as disabled and
+	// Take/Get become no-ops.
+	MaxCount uint
+	// Duration is the bucket refill interval (e.g. time.Minute).
+	// Required when MaxCount > 0.
+	Duration time.Duration
+	// Inactivity controls how long an idle per-host limiter is retained
+	// before the sweep reclaims it. Defaults to DefaultInactivity.
+	Inactivity time.Duration
+	// SweepInterval controls how often the background sweep runs.
+	// Defaults to DefaultSweepInterval.
+	SweepInterval time.Duration
+	// MaxHosts caps the number of live limiters retained at once. When
+	// the pool would exceed this size, the LRU entry is evicted (and
+	// Stop()-ed) on insert. Defaults to DefaultMaxHosts.
+	MaxHosts int
+}
+
+// Disabled reports whether this options configuration produces a no-op pool.
+func (o Options) Disabled() bool {
+	return o.MaxCount == 0 || o.Duration == 0
+}
+
+func (o *Options) applyDefaults() {
+	if o.Inactivity <= 0 {
+		o.Inactivity = DefaultInactivity
+	}
+	if o.SweepInterval <= 0 {
+		o.SweepInterval = DefaultSweepInterval
+	}
+	if o.MaxHosts <= 0 {
+		o.MaxHosts = DefaultMaxHosts
+	}
+}
+
+// Pool is a per-host rate limiter pool.
+//
+// The zero value is not usable; use NewPool. A nil *Pool is valid and behaves
+// as a no-op limiter, so callers can use the same code path whether or not
+// per-host limiting is enabled.
+type Pool struct {
+	ctx    context.Context
+	cancel context.CancelFunc
+	opts   Options
+
+	mu      sync.Mutex
+	entries map[string]*entry
+	// stoppedLimiters is incremented every time Stop() is called on a
+	// per-host limiter (eviction or shutdown). Exposed for tests; the
+	// production caller does not need it.
+	stoppedLimiters uint64
+
+	stopOnce sync.Once
+	doneCh   chan struct{}
+}
+
+type entry struct {
+	limiter    *ratelimit.Limiter
+	lastAccess time.Time
+}
+
+// NewPool constructs a Pool. If opts.Disabled() reports true a nil Pool is
+// returned, which is valid and acts as a no-op.
+func NewPool(ctx context.Context, opts Options) *Pool {
+	if opts.Disabled() {
+		return nil
+	}
+	opts.applyDefaults()
+
+	pctx, cancel := context.WithCancel(ctx)
+	p := &Pool{
+		ctx:     pctx,
+		cancel:  cancel,
+		opts:    opts,
+		entries: make(map[string]*entry),
+		doneCh:  make(chan struct{}),
+	}
+	go p.sweepLoop()
+	return p
+}
+
+// Take acquires one token from the per-host limiter for host. If host is
+// empty or the pool is nil/disabled, Take is a no-op.
+func (p *Pool) Take(host string) {
+	if p == nil || host == "" {
+		return
+	}
+	l := p.getOrCreate(host)
+	if l != nil {
+		l.Take()
+	}
+}
+
+// Get returns the *ratelimit.Limiter associated with host, creating it on
+// first use. Returns nil if the pool is nil/disabled or host is empty.
+//
+// Most callers should prefer Take(host) which combines lookup and
+// acquisition; Get is exposed for tests and rare callers that need the
+// raw limiter (e.g. to query GetLimit()).
+func (p *Pool) Get(host string) *ratelimit.Limiter {
+	if p == nil || host == "" {
+		return nil
+	}
+	return p.getOrCreate(host)
+}
+
+// Len returns the number of live per-host limiters. Useful for tests and
+// metrics; a nil Pool reports 0.
+func (p *Pool) Len() int {
+	if p == nil {
+		return 0
+	}
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return len(p.entries)
+}
+
+// Stop drains all per-host limiters and stops the background sweep. Safe to
+// call multiple times and on a nil Pool.
+func (p *Pool) Stop() {
+	if p == nil {
+		return
+	}
+	p.stopOnce.Do(func() {
+		p.cancel()
+		<-p.doneCh
+
+		p.mu.Lock()
+		defer p.mu.Unlock()
+		for k, e := range p.entries {
+			e.limiter.Stop()
+			p.stoppedLimiters++
+			delete(p.entries, k)
+		}
+	})
+}
+
+func (p *Pool) getOrCreate(host string) *ratelimit.Limiter {
+	now := time.Now()
+
+	p.mu.Lock()
+	if e, ok := p.entries[host]; ok {
+		e.lastAccess = now
+		l := e.limiter
+		p.mu.Unlock()
+		return l
+	}
+
+	// Enforce hard cap before insert so the map size never exceeds
+	// MaxHosts. We pick the LRU victim under the lock and Stop() it
+	// outside the lock to keep the critical section short.
+	var victim *ratelimit.Limiter
+	if len(p.entries) >= p.opts.MaxHosts {
+		var (
+			oldestKey  string
+			oldestTime time.Time
+			first      = true
+		)
+		for k, e := range p.entries {
+			if first || e.lastAccess.Before(oldestTime) {
+				oldestKey = k
+				oldestTime = e.lastAccess
+				first = false
+			}
+		}
+		if oldestKey != "" {
+			victim = p.entries[oldestKey].limiter
+			delete(p.entries, oldestKey)
+			p.stoppedLimiters++
+		}
+	}
+
+	l := ratelimit.New(p.ctx, p.opts.MaxCount, p.opts.Duration)
+	p.entries[host] = &entry{limiter: l, lastAccess: now}
+	p.mu.Unlock()
+
+	if victim != nil {
+		victim.Stop()
+	}
+	return l
+}
+
+// sweepLoop periodically reclaims per-host limiters that have been inactive
+// for opts.Inactivity. Each evicted limiter is Stop()-ed to release its
+// internal goroutine.
+func (p *Pool) sweepLoop() {
+	defer close(p.doneCh)
+
+	ticker := time.NewTicker(p.opts.SweepInterval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-p.ctx.Done():
+			return
+		case <-ticker.C:
+			p.evictIdle(time.Now())
+		}
+	}
+}
+
+// evictIdle removes entries whose lastAccess is older than now-Inactivity.
+// Stops are performed outside the lock.
+func (p *Pool) evictIdle(now time.Time) {
+	cutoff := now.Add(-p.opts.Inactivity)
+
+	var stops []*ratelimit.Limiter
+	p.mu.Lock()
+	for k, e := range p.entries {
+		if e.lastAccess.Before(cutoff) {
+			stops = append(stops, e.limiter)
+			delete(p.entries, k)
+			p.stoppedLimiters++
+		}
+	}
+	p.mu.Unlock()
+
+	for _, l := range stops {
+		l.Stop()
+	}
+}
diff --git a/pkg/protocols/common/hostratelimit/hostratelimit_perf_test.go b/pkg/protocols/common/hostratelimit/hostratelimit_perf_test.go
new file mode 100644
index 0000000000..8ebe0da076
--- /dev/null
+++ b/pkg/protocols/common/hostratelimit/hostratelimit_perf_test.go
@@ -0,0 +1,336 @@
+package hostratelimit
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/projectdiscovery/ratelimit"
+	"github.com/stretchr/testify/require"
+)
+
+// startTimingServer returns a server whose handler simply records every hit.
+// The returned counter is goroutine-safe so callers can assert per-host
+// dispatch rates.
+func startTimingServer(t testing.TB) (*httptest.Server, *atomic.Int64) {
+	t.Helper()
+	var hits atomic.Int64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		hits.Add(1)
+		w.WriteHeader(http.StatusOK)
+		_, _ = fmt.Fprint(w, "ok")
+	}))
+	t.Cleanup(srv.Close)
+	return srv, &hits
+}
+
+func hostFromURL(t testing.TB, raw string) string {
+	t.Helper()
+	u, err := url.Parse(raw)
+	require.NoError(t, err)
+	return u.Host
+}
+
+func drainGet(t testing.TB, client *http.Client, u string) {
+	t.Helper()
+	resp, err := client.Get(u)
+	require.NoError(t, err)
+	_, _ = io.Copy(io.Discard, resp.Body)
+	_ = resp.Body.Close()
+}
+
+// TestIntegration_PerHostBudgetIsEnforced verifies that, when M concurrent
+// goroutines per host hammer the same target at full speed, the per-host
+// limiter keeps each host's observed request rate within its configured
+// budget. This is the end-to-end correctness check for the feature: the
+// pool is wired in front of an actual HTTP client and we assert by counting
+// server-side hits over a fixed window.
+//
+// Bounds are derived from the actual elapsed wall-clock so the test stays
+// stable under scheduler jitter, with a generous +/-30% slack around the
+// budgeted rate.
+func TestIntegration_PerHostBudgetIsEnforced(t *testing.T) {
+	if testing.Short() {
+		t.Skip("integration test uses real time-based budgets")
+	}
+
+	const (
+		numHosts   = 4
+		workersPer = 8
+		budget     = 5 // tokens per duration per host
+		duration   = 100 * time.Millisecond
+		runFor     = 600 * time.Millisecond
+	)
+
+	servers := make([]*httptest.Server, numHosts)
+	hits := make([]*atomic.Int64, numHosts)
+	for i := 0; i < numHosts; i++ {
+		servers[i], hits[i] = startTimingServer(t)
+	}
+
+	pool := NewPool(context.Background(), Options{
+		MaxCount: budget,
+		Duration: duration,
+	})
+	defer pool.Stop()
+
+	client := &http.Client{Timeout: 5 * time.Second}
+
+	ctx, cancel := context.WithTimeout(context.Background(), runFor)
+	defer cancel()
+
+	var wg sync.WaitGroup
+	start := time.Now()
+	for _, srv := range servers {
+		host := hostFromURL(t, srv.URL)
+		for w := 0; w < workersPer; w++ {
+			wg.Add(1)
+			go func(host, url string) {
+				defer wg.Done()
+				for {
+					select {
+					case <-ctx.Done():
+						return
+					default:
+					}
+					pool.Take(host)
+					select {
+					case <-ctx.Done():
+						return
+					default:
+					}
+					drainGet(t, client, url)
+				}
+			}(host, srv.URL)
+		}
+	}
+	wg.Wait()
+	elapsed := time.Since(start)
+
+	budgetedRate := float64(budget) / duration.Seconds() // req/sec per host
+	const slack = 0.30
+	for i := 0; i < numHosts; i++ {
+		got := hits[i].Load()
+		observedRate := float64(got) / elapsed.Seconds()
+		t.Logf("host %d hits=%d (%.1f rps; budgeted %.1f rps)",
+			i, got, observedRate, budgetedRate)
+
+		require.LessOrEqualf(t, observedRate, budgetedRate*(1+slack),
+			"host %d over budget: %.1f rps > %.1f rps (budget+slack)",
+			i, observedRate, budgetedRate*(1+slack))
+		require.GreaterOrEqualf(t, observedRate, budgetedRate*(1-slack),
+			"host %d under-served (workers idle?): %.1f rps < %.1f rps (budget-slack)",
+			i, observedRate, budgetedRate*(1-slack))
+	}
+	t.Logf("ran for %v across %d hosts × %d workers (budget %d / %v per host = %.0f rps each)",
+		elapsed.Round(time.Millisecond), numHosts, workersPer, budget, duration, budgetedRate)
+}
+
+// TestPerformance_PerHostUnlocksParallelism is the speedup test.
+//
+// Both scenarios target the same effective per-host rate B tokens / D and run
+// the same workload (numHosts * reqsPerHost). The difference is where the
+// budget lives:
+//
+//	"global-only": one shared limiter at B/D; goroutines for every host
+//	               serialize through the same bucket. Wall-clock floor:
+//	               numHosts * reqsPerHost / (B/D).
+//	"per-host":    one limiter per host at B/D; goroutines for different
+//	               hosts run in parallel up to numHosts × B/D total.
+//	               Wall-clock floor: reqsPerHost / (B/D).
+//
+// Workload is sized so the global-only run requires several refill windows
+// (otherwise both schemes finish under one tick and the test measures noise
+// instead of the rate-limiter behavior). Speedup ≈ numHosts. We assert it's
+// at least numHosts/2 to keep the test stable on busy CI without losing
+// the signal. Wall-clock numbers are always logged for visibility.
+func TestPerformance_PerHostUnlocksParallelism(t *testing.T) {
+	if testing.Short() {
+		t.Skip("speedup test uses real time-based budgets")
+	}
+
+	const (
+		numHosts    = 8
+		reqsPerHost = 20
+		budget      = 10 // tokens (= 100 rps per limiter at duration=100ms)
+		duration    = 100 * time.Millisecond
+	)
+
+	runGlobalOnly := func(t *testing.T) time.Duration {
+		global := ratelimit.New(context.Background(), budget, duration)
+		defer global.Stop()
+		var wg sync.WaitGroup
+		start := time.Now()
+		for h := 0; h < numHosts; h++ {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				for r := 0; r < reqsPerHost; r++ {
+					global.Take()
+				}
+			}()
+		}
+		wg.Wait()
+		return time.Since(start)
+	}
+
+	runPerHost := func(t *testing.T) time.Duration {
+		pool := NewPool(context.Background(), Options{
+			MaxCount: budget,
+			Duration: duration,
+		})
+		defer pool.Stop()
+		var wg sync.WaitGroup
+		start := time.Now()
+		for h := 0; h < numHosts; h++ {
+			host := fmt.Sprintf("h-%d", h)
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				for r := 0; r < reqsPerHost; r++ {
+					pool.Take(host)
+				}
+			}()
+		}
+		wg.Wait()
+		return time.Since(start)
+	}
+
+	// Warm both schemes once to make sure goroutines and timers are
+	// scheduled before measurement.
+	_ = runGlobalOnly(t)
+	_ = runPerHost(t)
+
+	const repeats = 3
+	var globalSum, perHostSum time.Duration
+	for i := 0; i < repeats; i++ {
+		globalSum += runGlobalOnly(t)
+		perHostSum += runPerHost(t)
+	}
+	globalAvg := globalSum / repeats
+	perHostAvg := perHostSum / repeats
+
+	speedup := float64(globalAvg) / float64(perHostAvg)
+	totalReqs := numHosts * reqsPerHost
+	t.Logf("workload: %d hosts × %d reqs (= %d total) at budget %d/%v per host",
+		numHosts, reqsPerHost, totalReqs, budget, duration)
+	t.Logf("global-only average: %v  (%.0f rps)",
+		globalAvg.Round(time.Millisecond),
+		float64(totalReqs)/globalAvg.Seconds())
+	t.Logf("per-host  average: %v  (%.0f rps)",
+		perHostAvg.Round(time.Millisecond),
+		float64(totalReqs)/perHostAvg.Seconds())
+	t.Logf("speedup: %.2fx (theoretical ceiling = %d)", speedup, numHosts)
+
+	// We expect at least half the theoretical N× speedup. Even on a busy
+	// machine this gap is huge; if it shrinks below half it almost
+	// certainly means the per-host pool is funneling through a single
+	// limiter and the regression is worth investigating.
+	require.GreaterOrEqual(t, speedup, float64(numHosts)/2,
+		"per-host limiter failed to unlock host parallelism: speedup=%.2fx", speedup)
+}
+
+// We want to measure pure Take() code overhead, not the rate limiter's
+// enforcement sleeps. ratelimit.Limiter starts with `max` tokens then
+// refills at max/duration. To keep the bucket from ever depleting during
+// the benchmark we size it well above b.N (Go benchmarks rarely go past
+// 1e8 iterations even at -benchtime=10s).
+const benchMaxCount = 1 << 30 // ~1.07B tokens, never depleted in-bench.
+
+// BenchmarkPool_Take_HotHost measures Take() overhead in steady state when
+// the limiter has plenty of tokens (no blocking on the bucket). This is the
+// per-call cost we add to every request when per-host limiting is enabled.
+func BenchmarkPool_Take_HotHost(b *testing.B) {
+	p := NewPool(context.Background(), Options{
+		MaxCount: benchMaxCount,
+		Duration: time.Second,
+	})
+	defer p.Stop()
+	const host = "hot.example.com"
+	p.Take(host) // warm the entry
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		p.Take(host)
+	}
+}
+
+// BenchmarkPool_Take_ManyHosts measures the cost of cycling Take() across a
+// large host set, exercising the map lookup + lastAccess update on the hot
+// path. This is the realistic shape for template-spray scans where a worker
+// hops between many targets.
+func BenchmarkPool_Take_ManyHosts(b *testing.B) {
+	const numHosts = 1024
+	hosts := make([]string, numHosts)
+	for i := range hosts {
+		hosts[i] = fmt.Sprintf("h-%d.example.com", i)
+	}
+
+	p := NewPool(context.Background(), Options{
+		MaxCount: benchMaxCount,
+		Duration: time.Second,
+		MaxHosts: numHosts * 2, // avoid LRU eviction during the benchmark
+	})
+	defer p.Stop()
+	for _, h := range hosts {
+		p.Take(h)
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		p.Take(hosts[i&(numHosts-1)])
+	}
+}
+
+// BenchmarkPool_Take_NilPool measures the cost of Take() on a nil *Pool, the
+// path taken when per-host limiting is disabled. This must be effectively
+// free so we can leave RateLimitTakeFor in the hot path unconditionally.
+func BenchmarkPool_Take_NilPool(b *testing.B) {
+	var p *Pool
+	const host = "any.example.com"
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		p.Take(host)
+	}
+}
+
+// BenchmarkPool_Take_Parallel models concurrent Take() across many hosts,
+// the realistic SDK shape where worker pools hit the limiter from multiple
+// goroutines simultaneously. With MaxCount well above the call rate the
+// bucket never blocks; we're measuring lock contention on the pool map.
+func BenchmarkPool_Take_Parallel(b *testing.B) {
+	const numHosts = 256
+	hosts := make([]string, numHosts)
+	for i := range hosts {
+		hosts[i] = fmt.Sprintf("h-%d.example.com", i)
+	}
+	p := NewPool(context.Background(), Options{
+		MaxCount: benchMaxCount,
+		Duration: time.Second,
+		MaxHosts: numHosts * 2,
+	})
+	defer p.Stop()
+	for _, h := range hosts {
+		p.Take(h)
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		i := 0
+		for pb.Next() {
+			p.Take(hosts[i&(numHosts-1)])
+			i++
+		}
+	})
+}
diff --git a/pkg/protocols/common/hostratelimit/hostratelimit_test.go b/pkg/protocols/common/hostratelimit/hostratelimit_test.go
new file mode 100644
index 0000000000..4803dc058c
--- /dev/null
+++ b/pkg/protocols/common/hostratelimit/hostratelimit_test.go
@@ -0,0 +1,238 @@
+package hostratelimit
+
+import (
+	"context"
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/projectdiscovery/ratelimit"
+	"github.com/stretchr/testify/require"
+)
+
+// TestNewPool_DisabledReturnsNil verifies that an Options with MaxCount==0 or
+// Duration==0 produces a nil pool, since callers rely on a nil *Pool being a
+// valid no-op.
+func TestNewPool_DisabledReturnsNil(t *testing.T) {
+	require.Nil(t, NewPool(context.Background(), Options{}))
+	require.Nil(t, NewPool(context.Background(), Options{MaxCount: 10}))
+	require.Nil(t, NewPool(context.Background(), Options{Duration: time.Second}))
+}
+
+// TestPool_NilIsNoOp asserts that all public methods are safe on a nil pool,
+// matching the comment on Pool that says callers can treat a nil pool as
+// disabled.
+func TestPool_NilIsNoOp(t *testing.T) {
+	var p *Pool
+	require.NotPanics(t, func() {
+		p.Take("example.com")
+		require.Nil(t, p.Get("example.com"))
+		require.Equal(t, 0, p.Len())
+		p.Stop()
+	})
+}
+
+// TestPool_SameHostReusesLimiter verifies the per-host cache: the second Get
+// for the same host must return the same *ratelimit.Limiter so requests
+// against one host share a single token bucket.
+func TestPool_SameHostReusesLimiter(t *testing.T) {
+	p := NewPool(context.Background(), Options{MaxCount: 10, Duration: time.Second})
+	defer p.Stop()
+
+	l1 := p.Get("example.com")
+	l2 := p.Get("example.com")
+	require.Same(t, l1, l2, "same host must reuse the cached limiter")
+	require.Equal(t, 1, p.Len())
+}
+
+// TestPool_DifferentHostsAreIsolated verifies that different hosts get
+// distinct limiters, which is the whole point of per-host limiting.
+func TestPool_DifferentHostsAreIsolated(t *testing.T) {
+	p := NewPool(context.Background(), Options{MaxCount: 10, Duration: time.Second})
+	defer p.Stop()
+
+	l1 := p.Get("a.example.com")
+	l2 := p.Get("b.example.com")
+	require.NotNil(t, l1)
+	require.NotNil(t, l2)
+	require.NotSame(t, l1, l2, "different hosts must have different limiters")
+	require.Equal(t, 2, p.Len())
+}
+
+// TestPool_EvictsIdleEntries verifies that the background sweep reclaims
+// entries that have been inactive longer than Options.Inactivity, and that
+// the evicted limiter is Stop()-ed (no goroutine leak).
+func TestPool_EvictsIdleEntries(t *testing.T) {
+	p := NewPool(context.Background(), Options{
+		MaxCount:      10,
+		Duration:      time.Second,
+		Inactivity:    50 * time.Millisecond,
+		SweepInterval: 25 * time.Millisecond,
+	})
+	defer p.Stop()
+
+	_ = p.Get("a.example.com")
+	require.Equal(t, 1, p.Len())
+
+	require.Eventually(t, func() bool {
+		return p.Len() == 0
+	}, time.Second, 10*time.Millisecond, "idle entry should be evicted by the sweep")
+
+	p.mu.Lock()
+	stops := p.stoppedLimiters
+	p.mu.Unlock()
+	require.GreaterOrEqual(t, stops, uint64(1),
+		"sweep must Stop() the evicted limiter to release its goroutine")
+}
+
+// TestPool_LRUCapEvictsOldest verifies the hard cap: when the pool is full,
+// inserting a new host evicts the least-recently-used entry. This bounds
+// worst-case memory on scans across very large input sets.
+func TestPool_LRUCapEvictsOldest(t *testing.T) {
+	p := NewPool(context.Background(), Options{
+		MaxCount: 10,
+		Duration: time.Second,
+		MaxHosts: 2,
+	})
+	defer p.Stop()
+
+	_ = p.Get("a")
+	time.Sleep(2 * time.Millisecond)
+	_ = p.Get("b")
+	require.Equal(t, 2, p.Len())
+
+	// Touching "a" makes "b" the LRU; inserting "c" must evict "b", not "a".
+	time.Sleep(2 * time.Millisecond)
+	_ = p.Get("a")
+	time.Sleep(2 * time.Millisecond)
+	_ = p.Get("c")
+
+	require.Equal(t, 2, p.Len())
+	require.NotNil(t, p.peek("a"), "most recently touched key must survive")
+	require.NotNil(t, p.peek("c"), "newly inserted key must be present")
+	require.Nil(t, p.peek("b"), "LRU key must be evicted under cap")
+}
+
+// peek returns the cached limiter for host without touching its lastAccess
+// timestamp. Test-only helper; not on the public API.
+func (p *Pool) peek(host string) *ratelimit.Limiter {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if e, ok := p.entries[host]; ok {
+		return e.limiter
+	}
+	return nil
+}
+
+// TestPool_TakeBlocksUntilToken verifies end-to-end behavior: with a tight
+// budget of 1 token per long interval, two consecutive Takes must observe
+// the bucket draining (the second Take blocks until the next refill).
+func TestPool_TakeBlocksUntilToken(t *testing.T) {
+	p := NewPool(context.Background(), Options{MaxCount: 1, Duration: 50 * time.Millisecond})
+	defer p.Stop()
+
+	const host = "h"
+	p.Take(host)
+
+	start := time.Now()
+	p.Take(host)
+	elapsed := time.Since(start)
+
+	require.GreaterOrEqual(t, elapsed, 25*time.Millisecond,
+		"second Take should wait for the bucket to refill (got %v)", elapsed)
+}
+
+// TestPool_StopDrainsAllLimiters verifies Stop() closes every cached limiter
+// so no per-host goroutine survives shutdown.
+func TestPool_StopDrainsAllLimiters(t *testing.T) {
+	p := NewPool(context.Background(), Options{MaxCount: 10, Duration: time.Second})
+
+	const N = 16
+	var wg sync.WaitGroup
+	for i := 0; i < N; i++ {
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			p.Take(uniqueHost(i))
+		}(i)
+	}
+	wg.Wait()
+	require.Equal(t, N, p.Len())
+
+	p.Stop()
+
+	require.Equal(t, 0, p.Len(), "Stop must drain all entries")
+	p.mu.Lock()
+	stopped := p.stoppedLimiters
+	p.mu.Unlock()
+	require.GreaterOrEqual(t, stopped, uint64(N),
+		"every cached limiter must be Stop()-ed")
+
+	// Calling Stop again must be safe.
+	require.NotPanics(t, p.Stop)
+}
+
+// TestPool_NoGoroutineLeak is the strict counterpart of the eviction test:
+// cycling through many short-lived hosts and stopping the pool must not leak
+// goroutines beyond a small fixed slack. This is the canary that protects
+// against future regressions where eviction silently forgets to Stop the
+// per-host limiter.
+func TestPool_NoGoroutineLeak(t *testing.T) {
+	runtime.GC()
+	time.Sleep(50 * time.Millisecond)
+	before := runtime.NumGoroutine()
+
+	for round := 0; round < 3; round++ {
+		p := NewPool(context.Background(), Options{
+			MaxCount:      10,
+			Duration:      time.Second,
+			Inactivity:    20 * time.Millisecond,
+			SweepInterval: 10 * time.Millisecond,
+			MaxHosts:      8,
+		})
+		for i := 0; i < 50; i++ {
+			p.Take(uniqueHost(i))
+		}
+		// Let the sweep run a few times so eviction-driven Stops fire,
+		// then drain the rest via Stop().
+		time.Sleep(80 * time.Millisecond)
+		p.Stop()
+	}
+
+	after := waitForGoroutineCount(before+2, 2000)
+	require.LessOrEqual(t, after, before+2,
+		"per-host limiter goroutines leaked: before=%d after=%d", before, after)
+}
+
+func uniqueHost(i int) string {
+	// keep allocations simple; avoid fmt to minimize unrelated noise in
+	// the goroutine-leak test
+	return "h-" + itoa(i)
+}
+
+func itoa(i int) string {
+	if i == 0 {
+		return "0"
+	}
+	var buf [20]byte
+	pos := len(buf)
+	for i > 0 {
+		pos--
+		buf[pos] = byte('0' + i%10)
+		i /= 10
+	}
+	return string(buf[pos:])
+}
+
+func waitForGoroutineCount(target, maxWaitMs int) int {
+	for waited := 0; waited < maxWaitMs; waited += 50 {
+		runtime.GC()
+		n := runtime.NumGoroutine()
+		if n <= target {
+			return n
+		}
+		time.Sleep(50 * time.Millisecond)
+	}
+	return runtime.NumGoroutine()
+}
diff --git a/pkg/protocols/dns/request.go b/pkg/protocols/dns/request.go
index 3cc0cd7156..3cb78a93e2 100644
--- a/pkg/protocols/dns/request.go
+++ b/pkg/protocols/dns/request.go
@@ -151,7 +151,7 @@ func (request *Request) execute(input *contextargs.Context, domain string, metad
 		}
 	}
 
-	request.options.RateLimitTake()
+	request.options.RateLimitTakeFor(domain)
 
 	// Send the request to the target servers
 	response, err := dnsClient.Do(compiledRequest)
diff --git a/pkg/protocols/http/request.go b/pkg/protocols/http/request.go
index f845bd8582..8189169fa5 100644
--- a/pkg/protocols/http/request.go
+++ b/pkg/protocols/http/request.go
@@ -71,6 +71,27 @@ func (request *Request) Type() templateTypes.ProtocolType {
 }
 
 // executeRaceRequest executes race condition request for a URL
+// rateLimitHostKey returns a stable per-host key for rate limiting derived
+// from the input target URL. Returns an empty string if the input cannot
+// be parsed; in that case the per-host limiter (if any) is skipped and
+// only the global rate limiter applies. The key uses URL.Host (host:port)
+// so different ports on the same hostname remain isolated buckets,
+// matching the per-host HTTP client pool keying.
+func rateLimitHostKey(input *contextargs.Context) string {
+	if input == nil || input.MetaInput == nil {
+		return ""
+	}
+	raw := input.MetaInput.Input
+	if raw == "" {
+		return ""
+	}
+	parsed, err := urlutil.ParseAbsoluteURL(raw, false)
+	if err != nil || parsed == nil {
+		return ""
+	}
+	return parsed.Host
+}
+
 func (request *Request) executeRaceRequest(input *contextargs.Context, previous output.InternalEvent, callback protocols.OutputEventCallback) error {
 	reqURL := input.MetaInput.Input
 	var generatedRequests []*generatedRequest
@@ -268,7 +289,7 @@ func (request *Request) executeParallelHTTP(input *contextargs.Context, dynamicV
 					spmHandler.Release()
 					continue
 				}
-				request.options.RateLimitTake()
+				request.options.RateLimitTakeFor(rateLimitHostKey(t.updatedInput))
 				hasInteractMatchers := interactsh.HasMatchers(request.CompiledOperators)
 				needsRequestEvent := hasInteractMatchers && request.NeedsRequestCondition()
 				select {
@@ -533,7 +554,7 @@ func (request *Request) ExecuteWithResults(input *contextargs.Context, dynamicVa
 		executeFunc := func(data string, payloads, dynamicValue map[string]interface{}) (bool, error) {
 			hasInteractMatchers := interactsh.HasMatchers(request.CompiledOperators)
 
-			request.options.RateLimitTake()
+			request.options.RateLimitTakeFor(rateLimitHostKey(input))
 
 			ctx := request.newContext(input)
 			ctxWithTimeout, cancel := context.WithTimeoutCause(ctx, request.options.Options.GetTimeouts().HttpTimeout, ErrHttpEngineRequestDeadline)
diff --git a/pkg/protocols/http/request_fuzz.go b/pkg/protocols/http/request_fuzz.go
index 3a7e2cc74a..3235434fcc 100644
--- a/pkg/protocols/http/request_fuzz.go
+++ b/pkg/protocols/http/request_fuzz.go
@@ -181,7 +181,7 @@ func (request *Request) executeGeneratedFuzzingRequest(gr fuzz.GeneratedRequest,
 	if request.options.HostErrorsCache != nil && request.options.HostErrorsCache.Check(request.options.ProtocolType.String(), input) {
 		return false
 	}
-	request.options.RateLimitTake()
+	request.options.RateLimitTakeFor(rateLimitHostKey(input))
 	req := &generatedRequest{
 		request:              gr.Request,
 		dynamicValues:        gr.DynamicValues,
diff --git a/pkg/protocols/protocols.go b/pkg/protocols/protocols.go
index ae5daa7970..2799b11e20 100644
--- a/pkg/protocols/protocols.go
+++ b/pkg/protocols/protocols.go
@@ -30,6 +30,7 @@ import (
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/globalmatchers"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hosterrorscache"
+	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/hostratelimit"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/interactsh"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/utils/excludematchers"
 	"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/variables"
@@ -88,6 +89,10 @@ type ExecutorOptions struct {
 	Progress progress.Progress
 	// RateLimiter is a rate-limiter for limiting sent number of requests.
 	RateLimiter *ratelimit.Limiter
+	// HostRateLimiter limits requests per target host. Optional: nil when
+	// per-host rate limiting is disabled (the default), in which case
+	// RateLimitTakeFor degenerates to RateLimitTake.
+	HostRateLimiter *hostratelimit.Pool
 	// Catalog is a template catalog implementation for nuclei
 	Catalog catalog.Catalog
 	// ProjectFile is the project file for nuclei
@@ -168,6 +173,22 @@ func (e *ExecutorOptions) RateLimitTake() {
 	}
 }
 
+// RateLimitTakeFor acquires one token from the global rate limiter and, if
+// configured, one token from the per-host limiter for host. The global
+// limiter is always taken first so a slow host cannot starve global capacity
+// budgeting.
+//
+// Pass an empty host when no host scope is available (e.g. self-contained
+// templates). In that case behavior matches RateLimitTake.
+func (e *ExecutorOptions) RateLimitTakeFor(host string) {
+	if e.RateLimiter != nil {
+		e.RateLimiter.Take()
+	}
+	if host != "" {
+		e.HostRateLimiter.Take(host)
+	}
+}
+
 // GetThreadsForNPayloadRequests returns the number of threads to use as default for
 // given max-request of payloads
 func (e *ExecutorOptions) GetThreadsForNPayloadRequests(totalRequests int, currentThreads int) int {
@@ -284,6 +305,7 @@ func (e *ExecutorOptions) Copy() *ExecutorOptions {
 		IssuesClient:        e.IssuesClient,
 		Progress:            e.Progress,
 		RateLimiter:         e.RateLimiter,
+		HostRateLimiter:     e.HostRateLimiter,
 		Catalog:             e.Catalog,
 		ProjectFile:         e.ProjectFile,
 		Browser:             e.Browser,
@@ -465,6 +487,7 @@ func (e *ExecutorOptions) ApplyNewEngineOptions(n *ExecutorOptions) {
 	e.IssuesClient = n.IssuesClient
 	e.Progress = n.Progress
 	e.RateLimiter = n.RateLimiter
+	e.HostRateLimiter = n.HostRateLimiter
 	e.Catalog = n.Catalog
 	e.ProjectFile = n.ProjectFile
 	e.Browser = n.Browser
diff --git a/pkg/types/types.go b/pkg/types/types.go
index 5ddf542ecc..eedb8fde23 100644
--- a/pkg/types/types.go
+++ b/pkg/types/types.go
@@ -153,6 +153,11 @@ type Options struct {
 	// Rate-Limit is the maximum number of requests per minute for specified target
 	// Deprecated: Use RateLimitDuration - automatically set Rate Limit Duration to 60 seconds
 	RateLimitMinute int
+	// RateLimitHost is the maximum number of requests per RateLimitHostDuration
+	// per host. 0 disables the per-host limiter (only the global RateLimit applies).
+	RateLimitHost int
+	// RateLimitHostDuration is the refill interval for the per-host bucket.
+	RateLimitHostDuration time.Duration
 	// PageTimeout is the maximum time to wait for a page in seconds
 	PageTimeout int
 	// InteractionsCacheSize is the number of interaction-url->req to keep in cache at a time.
@@ -546,6 +551,8 @@ func (options *Options) Copy() *Options {
 		RateLimit:                      options.RateLimit,
 		RateLimitDuration:              options.RateLimitDuration,
 		RateLimitMinute:                options.RateLimitMinute,
+		RateLimitHost:                  options.RateLimitHost,
+		RateLimitHostDuration:          options.RateLimitHostDuration,
 		PageTimeout:                    options.PageTimeout,
 		InteractionsCacheSize:          options.InteractionsCacheSize,
 		InteractionsPollDuration:       options.InteractionsPollDuration,
@@ -801,6 +808,7 @@ func DefaultOptions() *Options {
 	return &Options{
 		RateLimit:                  150,
 		RateLimitDuration:          time.Second,
+		RateLimitHostDuration:      time.Second,
 		BulkSize:                   25,
 		TemplateThreads:            25,
 		HeadlessBulkSize:           10,

From 09b61208b6b5f546f4c3d008b37671a958d27c40 Mon Sep 17 00:00:00 2001
From: Mzack9999 <mzack9999@protonmail.com>
Date: Mon, 27 Apr 2026 11:31:18 +0800
Subject: [PATCH 2/3] updating docs

---
 README.md       | 22 ++++++++++++----------
 README_CN.md    |  2 ++
 README_ES.md    |  2 ++
 README_ID.md    |  2 ++
 README_KR.md    |  2 ++
 README_PT-BR.md |  2 ++
 6 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index fd0e41c911..c22359579d 100644
--- a/README.md
+++ b/README.md
@@ -275,16 +275,18 @@ UNCOVER:
    -ur, -uncover-ratelimit int    override ratelimit of engines with unknown ratelimit (default 60 req/min) (default 60)
 
 RATE-LIMIT:
-   -rl, -rate-limit int               maximum number of requests to send per second (default 150)
-   -rld, -rate-limit-duration value   maximum number of requests to send per second (default 1s)
-   -rlm, -rate-limit-minute int       maximum number of requests to send per minute (DEPRECATED)
-   -bs, -bulk-size int                maximum number of hosts to be analyzed in parallel per template (default 25)
-   -c, -concurrency int               maximum number of templates to be executed in parallel (default 25)
-   -hbs, -headless-bulk-size int      maximum number of headless hosts to be analyzed in parallel per template (default 10)
-   -headc, -headless-concurrency int  maximum number of headless templates to be executed in parallel (default 10)
-   -jsc, -js-concurrency int          maximum number of javascript runtimes to be executed in parallel (default 120)
-   -pc, -payload-concurrency int      max payload concurrency for each template (default 25)
-   -prc, -probe-concurrency int       http probe concurrency with httpx (default 50)
+   -rl, -rate-limit int                     maximum number of requests to send per second (default 150)
+   -rld, -rate-limit-duration value         maximum number of requests to send per second (default 1s)
+   -rlm, -rate-limit-minute int             maximum number of requests to send per minute (DEPRECATED)
+   -rlh, -rate-limit-host int               maximum number of requests to send per host per rate-limit-host-duration (0 = disabled)
+   -rlhd, -rate-limit-host-duration value   refill interval for the per-host rate limit bucket (default 1s)
+   -bs, -bulk-size int                      maximum number of hosts to be analyzed in parallel per template (default 25)
+   -c, -concurrency int                     maximum number of templates to be executed in parallel (default 25)
+   -hbs, -headless-bulk-size int            maximum number of headless hosts to be analyzed in parallel per template (default 10)
+   -headc, -headless-concurrency int        maximum number of headless templates to be executed in parallel (default 10)
+   -jsc, -js-concurrency int                maximum number of javascript runtimes to be executed in parallel (default 120)
+   -pc, -payload-concurrency int            max payload concurrency for each template (default 25)
+   -prc, -probe-concurrency int             http probe concurrency with httpx (default 50)
    -tlc, -template-loading-concurrency int  maximum number of concurrent template loading operations (default 50)
 
 OPTIMIZATIONS:
diff --git a/README_CN.md b/README_CN.md
index 22d013618f..08ec71348c 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -234,6 +234,8 @@ UNCOVER引擎:
 限速：
    -rl, -rate-limit int                  每秒最大请求量（默认：150）
    -rlm, -rate-limit-minute int          每分钟最大请求量
+   -rlh, -rate-limit-host int            每个主机每 rate-limit-host-duration 时间窗口内的最大请求数（0 = 禁用）
+   -rlhd, -rate-limit-host-duration value  每主机限速桶的填充间隔（默认：1秒）
    -bs, -bulk-size int                   每个模板最大并行检测数（默认：25）
    -c, -concurrency int                  并行执行的最大模板数量（默认：25）
    -hbs, -headless-bulk-size int         每个模板并行运行的无头主机最大数量（默认：10）
diff --git a/README_ES.md b/README_ES.md
index 3959ee452c..88c38ec8ad 100644
--- a/README_ES.md
+++ b/README_ES.md
@@ -238,6 +238,8 @@ UNCOVER:
 RATE-LIMIT:
    -rl, -rate-limit int               número máximo de peticiones a enviar por segundo (por defecto 150)
    -rlm, -rate-limit-minute int       número máximo de peticiones a enviar por minuto
+   -rlh, -rate-limit-host int         número máximo de peticiones por host por rate-limit-host-duration (0 = desactivado)
+   -rlhd, -rate-limit-host-duration value  intervalo de recarga del bucket de rate-limit por host (por defecto 1s)
    -bs, -bulk-size int                número máximo de hosts a ser analizados en paralelo por plantilla (por defecto 25)
    -c, -concurrency int               número máximo de plantillas a ejecutar en paralelo (por defecto 25)
    -hbs, -headless-bulk-size int      número máximo de hosts headless a ser analizados en paralelo por plantilla (por defecto 10)
diff --git a/README_ID.md b/README_ID.md
index 848a170672..c97ab8cc8c 100644
--- a/README_ID.md
+++ b/README_ID.md
@@ -207,6 +207,8 @@ UNCOVER:
 RATE-LIMIT:
    -rl, -rate-limit int               maximum number of requests to send per second (default 150)
    -rlm, -rate-limit-minute int       maximum number of requests to send per minute
+   -rlh, -rate-limit-host int         maximum number of requests to send per host per rate-limit-host-duration (0 = disabled)
+   -rlhd, -rate-limit-host-duration value  refill interval for the per-host rate limit bucket (default 1s)
    -bs, -bulk-size int                maximum number of hosts to be analyzed in parallel per template (default 25)
    -c, -concurrency int               maximum number of templates to be executed in parallel (default 25)
    -hbs, -headless-bulk-size int      maximum number of headless hosts to be analyzed in parallel per template (default 10)
diff --git a/README_KR.md b/README_KR.md
index 6316cf1a82..65a3f78cac 100644
--- a/README_KR.md
+++ b/README_KR.md
@@ -205,6 +205,8 @@ UNCOVER:
 RATE-LIMIT:
    -rl, -rate-limit int               초당 보낼 최대 요청 수 (기본값 150)
    -rlm, -rate-limit-minute int       분당 보낼 최대 요청 수
+   -rlh, -rate-limit-host int         호스트당 rate-limit-host-duration 동안 보낼 최대 요청 수 (0 = 비활성)
+   -rlhd, -rate-limit-host-duration value  호스트별 rate-limit 버킷의 리필 간격 (기본값 1s)
    -bs, -bulk-size int                템플릿당 병렬로 분석할 최대 호스트 수 (기본값 25)
    -c, -concurrency int               병렬로 실행할 최대 템플릿 수 (기본값 25)
    -hbs, -headless-bulk-size int      템플릿당 병렬로 분석할 최대 headless 호스트 수 (기본값 10)
diff --git a/README_PT-BR.md b/README_PT-BR.md
index 4d8650394e..e230be32f6 100644
--- a/README_PT-BR.md
+++ b/README_PT-BR.md
@@ -238,6 +238,8 @@ UNCOVER:
 RATE-LIMIT:
    -rl, -rate-limit int               número máximo de solicitações a serem enviadas por segundo (padrão 150)
    -rlm, -rate-limit-minute int       número máximo de solicitações a serem enviadas por minuto
+   -rlh, -rate-limit-host int         número máximo de solicitações por host por rate-limit-host-duration (0 = desativado)
+   -rlhd, -rate-limit-host-duration value  intervalo de recarga do bucket de rate-limit por host (padrão 1s)
    -bs, -bulk-size int                número máximo de hosts a serem analisados em paralelo por template (padrão 25)
    -c, -concurrency int               número máximo de templates a serem executados em paralelo (padrão 25)
    -hbs, -headless-bulk-size int      número máximo de hosts headless a serem analisados em paralelo por template (padrão 10)

From 7a26399fe9a96e09034662de5b384c78c7b30dea Mon Sep 17 00:00:00 2001
From: Mzack9999 <mzack9999@protonmail.com>
Date: Tue, 28 Apr 2026 03:22:20 +0800
Subject: [PATCH 3/3] rlh takes priority over global rl

---
 README.md                  |  4 ++--
 cmd/nuclei/main.go         |  4 ++--
 lib/config.go              | 20 +++++++++++++-------
 pkg/protocols/protocols.go | 25 +++++++++++++++++--------
 4 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index c22359579d..96aaae8062 100644
--- a/README.md
+++ b/README.md
@@ -275,10 +275,10 @@ UNCOVER:
    -ur, -uncover-ratelimit int    override ratelimit of engines with unknown ratelimit (default 60 req/min) (default 60)
 
 RATE-LIMIT:
-   -rl, -rate-limit int                     maximum number of requests to send per second (default 150)
+   -rl, -rate-limit int                     maximum number of requests to send per second (ignored when -rate-limit-host is set) (default 150)
    -rld, -rate-limit-duration value         maximum number of requests to send per second (default 1s)
    -rlm, -rate-limit-minute int             maximum number of requests to send per minute (DEPRECATED)
-   -rlh, -rate-limit-host int               maximum number of requests to send per host per rate-limit-host-duration (0 = disabled)
+   -rlh, -rate-limit-host int               maximum number of requests to send per host per rate-limit-host-duration (0 = disabled, takes priority over -rate-limit)
    -rlhd, -rate-limit-host-duration value   refill interval for the per-host rate limit bucket (default 1s)
    -bs, -bulk-size int                      maximum number of hosts to be analyzed in parallel per template (default 25)
    -c, -concurrency int                     maximum number of templates to be executed in parallel (default 25)
diff --git a/cmd/nuclei/main.go b/cmd/nuclei/main.go
index e340b4bab7..d1f92f8e70 100644
--- a/cmd/nuclei/main.go
+++ b/cmd/nuclei/main.go
@@ -416,10 +416,10 @@ on extensive configurability, massive extensibility and ease of use.`)
 	)
 
 	flagSet.CreateGroup("rate-limit", "Rate-Limit",
-		flagSet.IntVarP(&options.RateLimit, "rate-limit", "rl", 150, "maximum number of requests to send per second"),
+		flagSet.IntVarP(&options.RateLimit, "rate-limit", "rl", 150, "maximum number of requests to send per second (ignored when -rate-limit-host is set)"),
 		flagSet.DurationVarP(&options.RateLimitDuration, "rate-limit-duration", "rld", time.Second, "maximum number of requests to send per second"),
 		flagSet.IntVarP(&options.RateLimitMinute, "rate-limit-minute", "rlm", 0, "maximum number of requests to send per minute (DEPRECATED)"),
-		flagSet.IntVarP(&options.RateLimitHost, "rate-limit-host", "rlh", 0, "maximum number of requests to send per host per rate-limit-host-duration (0 = disabled)"),
+		flagSet.IntVarP(&options.RateLimitHost, "rate-limit-host", "rlh", 0, "maximum number of requests to send per host per rate-limit-host-duration (0 = disabled, takes priority over -rate-limit)"),
 		flagSet.DurationVarP(&options.RateLimitHostDuration, "rate-limit-host-duration", "rlhd", time.Second, "refill interval for the per-host rate limit bucket"),
 		flagSet.IntVarP(&options.BulkSize, "bulk-size", "bs", 25, "maximum number of hosts to be analyzed in parallel per template"),
 		flagSet.IntVarP(&options.TemplateThreads, "concurrency", "c", 25, "maximum number of templates to be executed in parallel"),
diff --git a/lib/config.go b/lib/config.go
index 2885459d75..0eb9624391 100644
--- a/lib/config.go
+++ b/lib/config.go
@@ -187,14 +187,20 @@ func WithGlobalRateLimitCtx(ctx context.Context, maxTokens int, duration time.Du
 	}
 }
 
-// WithHostRateLimit sets a per-host rate limit, in addition to the global
-// rate limit. Each unique target host will be capped at maxTokens requests
-// per duration. Pass maxTokens=0 to disable per-host limiting (the default).
+// WithHostRateLimit configures a per-host rate limit. Each unique target
+// host is capped at maxTokens requests per duration. Pass maxTokens=0 to
+// disable per-host limiting (the default).
 //
-// The per-host limiter complements WithGlobalRateLimit: a request acquires a
-// token from the global limiter first, then from the per-host limiter. This
-// means the global limit still bounds total scan throughput while the
-// per-host limit prevents any single target from being overwhelmed.
+// The per-host limiter takes priority over the global rate limit
+// (WithGlobalRateLimitCtx): when this option is in effect, requests carrying
+// a host scope consult only the per-host bucket, and the global limiter is
+// bypassed. This is intentional — the global rate limit defaults to a
+// non-zero value, so layering both would silently throttle aggregate
+// throughput and defeat the point of opting into a per-host budget.
+// Aggregate scan throughput is naturally bounded by num_hosts * maxTokens.
+//
+// Requests without a host scope (rare, e.g. self-contained templates) fall
+// back to the global limiter so they remain paced.
 func WithHostRateLimit(ctx context.Context, maxTokens int, duration time.Duration) NucleiSDKOptions {
 	return func(e *NucleiEngine) error {
 		e.opts.RateLimitHost = maxTokens
diff --git a/pkg/protocols/protocols.go b/pkg/protocols/protocols.go
index 2799b11e20..3598e12cb7 100644
--- a/pkg/protocols/protocols.go
+++ b/pkg/protocols/protocols.go
@@ -173,20 +173,29 @@ func (e *ExecutorOptions) RateLimitTake() {
 	}
 }
 
-// RateLimitTakeFor acquires one token from the global rate limiter and, if
-// configured, one token from the per-host limiter for host. The global
-// limiter is always taken first so a slow host cannot starve global capacity
-// budgeting.
+// RateLimitTakeFor acquires one token from the appropriate rate limiter for
+// host. When a per-host limiter is configured (i.e. -rate-limit-host > 0)
+// it takes priority over the global limiter and is the only limiter
+// consulted; the global -rate-limit is bypassed.
+//
+// This priority is intentional: -rate-limit defaults to a non-zero value
+// (150 rps), so applying both limiters would silently cap aggregate
+// throughput at the global default and defeat the purpose of opting into
+// a per-host budget. Users who want a strict global ceiling on top of
+// per-host budgets should rely on -rate-limit-host alone (the aggregate
+// is bounded by num_hosts * rate-limit-host).
 //
 // Pass an empty host when no host scope is available (e.g. self-contained
-// templates). In that case behavior matches RateLimitTake.
+// templates); the call falls back to the global limiter so those requests
+// are still paced.
 func (e *ExecutorOptions) RateLimitTakeFor(host string) {
+	if e.HostRateLimiter != nil && host != "" {
+		e.HostRateLimiter.Take(host)
+		return
+	}
 	if e.RateLimiter != nil {
 		e.RateLimiter.Take()
 	}
-	if host != "" {
-		e.HostRateLimiter.Take(host)
-	}
 }
 
 // GetThreadsForNPayloadRequests returns the number of threads to use as default for