driftlockcbad

package module

v0.0.0-...-1209ab3 Latest Latest Go to latest Published: Dec 10, 2025 License: Apache-2.0 Imports: 23 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/Shannon-Labs/driftlock

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
func AvailabilityError() error
func ExtractNumericFields(data []byte) (map[string]float64, error)
func HasOpenZL() bool
func IsAvailable() bool
func NewFactory() processor.Factory
func TestDelimiterFixWithStatisticalSignificance(t *testing.T)
func ValidateConfig(cfg DetectorConfig) error
func ValidateLibrary() error
type Config
type Detector
- func NewDetector(config DetectorConfig) (*Detector, error)
- func (d *Detector) AddData(data []byte) (bool, error)
- func (d *Detector) Close()
- func (d *Detector) DetectAnomaly() (bool, *EnhancedMetrics, error)
- func (d *Detector) GetStats() (totalEvents uint64, memoryUsage int, isReady bool, err error)
- func (d *Detector) IsReady() (bool, error)
type DetectorConfig
- func DefaultDemoConfig() DetectorConfig
- func DefaultProductionConfig() DetectorConfig
type EnhancedMetrics
- func (m *EnhancedMetrics) GetAnomalyExplanation() string
- func (m *EnhancedMetrics) GetDetailedExplanation() string
type KafkaConfig
type Metrics
- func ComputeMetrics(baseline []byte, window []byte, seed uint64, permutations int) (*Metrics, error)
- func ComputeMetricsQuick(baseline []byte, window []byte) (*Metrics, error)
- func (m *Metrics) GetAnomalyExplanation() string
- func (m *Metrics) GetConfidenceLevel() float64
- func (m *Metrics) GetDetailedExplanation() string
- func (m *Metrics) IsStatisticallySignificant() bool
type NumericFieldExtractor
type NumericStats
- func NewNumericStats() *NumericStats
- func (s *NumericStats) IsOutlier(x float64, k float64) bool
- func (s *NumericStats) Snapshot() NumericStatsSnapshot
- func (s *NumericStats) StdDev() float64
- func (s *NumericStats) Update(x float64)
- func (s *NumericStats) Variance() float64
- func (s *NumericStats) ZScore(x float64) float64
type NumericStatsRegistry
- func NewNumericStatsRegistry() *NumericStatsRegistry
- func (r *NumericStatsRegistry) Clear(streamID string)
- func (r *NumericStatsRegistry) GetOrCreate(streamID, fieldPath string) *NumericStats
- func (r *NumericStatsRegistry) GetStreamStats(streamID string) map[string]NumericStatsSnapshot
type NumericStatsSnapshot
type RedisConfig
type Tokenizer
- func GetTokenizer(cfg TokenizerConfig) *Tokenizer
- func NewTokenizer(cfg TokenizerConfig) *Tokenizer
- func (t *Tokenizer) Config() TokenizerConfig
- func (t *Tokenizer) ResetStats()
- func (t *Tokenizer) Stats() TokenizerStats
- func (t *Tokenizer) Tokenize(data []byte) []byte
- func (t *Tokenizer) UpdateConfig(cfg TokenizerConfig)
type TokenizerConfig
- func DefaultTokenizerConfig() TokenizerConfig
type TokenizerStats
type ValueOutlier
- func CheckForOutliers(registry *NumericStatsRegistry, streamID string, event []byte, kSigma float64, ...) ([]ValueOutlier, error)

Constants ¶

This section is empty.

Variables ¶

View Source

var ErrCBADPanic = errors.New("CBAD internal panic - Rust panic caught at FFI boundary")

ErrCBADPanic indicates a Rust panic was caught in the CBAD library. This is a critical error indicating a bug in the Rust code.

Functions ¶

func AvailabilityError ¶

func AvailabilityError() error

AvailabilityError returns the last validation error, if any.

func ExtractNumericFields ¶

func ExtractNumericFields(data []byte) (map[string]float64, error)

ExtractNumericFields recursively extracts all numeric fields from JSON. Returns map of field paths (dot-notation) to values. Example: {"user": {"age": 25}} -> {"user.age": 25}

func HasOpenZL ¶

func HasOpenZL() bool

HasOpenZL returns true when the linked Rust library was built with the optional OpenZL feature.

func IsAvailable ¶

func IsAvailable() bool

IsAvailable reports whether the CBAD Rust core is reachable via CGO.

func NewFactory ¶

func NewFactory() processor.Factory

func TestDelimiterFixWithStatisticalSignificance ¶

func TestDelimiterFixWithStatisticalSignificance(t *testing.T)

func ValidateConfig ¶

func ValidateConfig(cfg DetectorConfig) error

ValidateConfig checks if the detector configuration is valid

func ValidateLibrary ¶

func ValidateLibrary() error

ValidateLibrary checks if the CBAD library is properly loaded and functional

Types ¶

type Config ¶

type Config struct {
	// Embed the component.Config interface to satisfy the component configuration contract
	component.Config `mapstructure:",squash"`

	WindowSize  int     `mapstructure:"window_size"`
	HopSize     int     `mapstructure:"hop_size"`
	Threshold   float64 `mapstructure:"threshold"`
	Determinism bool    `mapstructure:"determinism"`

	// Kafka configuration for publishing OTLP events
	Kafka KafkaConfig `mapstructure:"kafka"`

	// Redis configuration for distributed state management
	Redis RedisConfig `mapstructure:"redis"`
}

Config defines the configuration for the driftlock CBAD processor

type Detector ¶

type Detector struct {
	// contains filtered or unexported fields
}

Detector represents a streaming anomaly detector

func NewDetector ¶

func NewDetector(config DetectorConfig) (*Detector, error)

NewDetector creates a new anomaly detector with the given configuration

func (*Detector) AddData ¶

func (d *Detector) AddData(data []byte) (bool, error)

AddData adds data to the anomaly detector

func (*Detector) Close ¶

func (d *Detector) Close()

Close destroys the detector and frees its memory

func (*Detector) DetectAnomaly ¶

func (d *Detector) DetectAnomaly() (bool, *EnhancedMetrics, error)

DetectAnomaly performs anomaly detection and returns results

func (*Detector) GetStats ¶

func (d *Detector) GetStats() (totalEvents uint64, memoryUsage int, isReady bool, err error)

GetStats returns current detector statistics

func (*Detector) IsReady ¶

func (d *Detector) IsReady() (bool, error)

IsReady checks if the detector has enough data for analysis

type DetectorConfig ¶

type DetectorConfig struct {
	BaselineSize                   int
	WindowSize                     int
	HopSize                        int
	MaxCapacity                    int
	PValueThreshold                float64
	NCDThreshold                   float64
	PermutationCount               int
	Seed                           uint64
	CompressionRatioDropThreshold  float64
	EntropyChangeThreshold         float64
	CompositeThreshold             float64
	RequireStatisticalSignificance bool
	CompressionAlgorithm           string
}

DetectorConfig represents configuration for the anomaly detector.

func DefaultDemoConfig ¶

func DefaultDemoConfig() DetectorConfig

DefaultDemoConfig returns a configuration optimized for demo

func DefaultProductionConfig ¶

func DefaultProductionConfig() DetectorConfig

DefaultProductionConfig returns a production-ready configuration

type EnhancedMetrics ¶

type EnhancedMetrics struct {
	NCD                        float64
	PValue                     float64
	BaselineCompressionRatio   float64
	WindowCompressionRatio     float64
	BaselineEntropy            float64
	WindowEntropy              float64
	IsAnomaly                  bool
	ConfidenceLevel            float64
	IsStatisticallySignificant bool
	CompressionRatioChange     float64
	EntropyChange              float64
	Explanation                string
	RecommendedNCDThreshold    float64
	RecommendedWindowSize      int
	DataStabilityScore         float64
}

EnhancedMetrics represents comprehensive anomaly detection results.

func (*EnhancedMetrics) GetAnomalyExplanation ¶

func (m *EnhancedMetrics) GetAnomalyExplanation() string

GetAnomalyExplanation generates a human-readable explanation of the anomaly detection result

func (*EnhancedMetrics) GetDetailedExplanation ¶

func (m *EnhancedMetrics) GetDetailedExplanation() string

GetDetailedExplanation provides a comprehensive explanation with statistical significance

type KafkaConfig ¶

type KafkaConfig struct {
	Enabled        bool     `mapstructure:"enabled"`
	Brokers        []string `mapstructure:"brokers"`
	ClientID       string   `mapstructure:"client_id"`
	EventsTopic    string   `mapstructure:"events_topic"`
	TLSEnabled     bool     `mapstructure:"tls_enabled"`
	BatchSize      int      `mapstructure:"batch_size"`
	BatchTimeoutMs int      `mapstructure:"batch_timeout_ms"`
}

KafkaConfig holds Apache Kafka settings for the processor

type Metrics ¶

type Metrics struct {
	NCD                      float64
	PValue                   float64
	BaselineCompressionRatio float64
	WindowCompressionRatio   float64
	BaselineEntropy          float64
	WindowEntropy            float64
	IsAnomaly                bool
	ConfidenceLevel          float64
}

Metrics represents the anomaly detection results from CBAD.

func ComputeMetrics ¶

func ComputeMetrics(baseline []byte, window []byte, seed uint64, permutations int) (*Metrics, error)

ComputeMetrics calculates CBAD anomaly detection metrics using compression-based analysis

func ComputeMetricsQuick ¶

func ComputeMetricsQuick(baseline []byte, window []byte) (*Metrics, error)

ComputeMetricsQuick calculates CBAD metrics with default configuration

func (*Metrics) GetAnomalyExplanation ¶

func (m *Metrics) GetAnomalyExplanation() string

GetAnomalyExplanation generates a human-readable explanation of the anomaly detection result.

func (*Metrics) GetConfidenceLevel ¶

func (m *Metrics) GetConfidenceLevel() float64

GetConfidenceLevel returns the confidence level as a percentage (0-100).

func (*Metrics) GetDetailedExplanation ¶

func (m *Metrics) GetDetailedExplanation() string

GetDetailedExplanation provides a comprehensive explanation with statistical significance.

func (*Metrics) IsStatisticallySignificant ¶

func (m *Metrics) IsStatisticallySignificant() bool

IsStatisticallySignificant returns true if the anomaly is statistically significant (p < 0.05).

type NumericFieldExtractor ¶

type NumericFieldExtractor struct {
	// IncludePatterns: if non-empty, only fields matching these patterns are tracked
	IncludePatterns []string
	// ExcludePatterns: fields matching these patterns are ignored
	ExcludePatterns []string
}

NumericFieldExtractor extracts numeric values from JSON with their paths

type NumericStats ¶

type NumericStats struct {
	N    int64   // Count of values seen
	Mean float64 // Running mean
	M2   float64 // Sum of squared differences from mean (for variance)
	Min  float64 // Minimum value seen
	Max  float64 // Maximum value seen
	// contains filtered or unexported fields
}

NumericStats tracks running statistics using Welford's online algorithm. This allows single-pass computation of mean and variance without storing all values. Memory efficient: O(1) per field path regardless of event count.

func NewNumericStats ¶

func NewNumericStats() *NumericStats

NewNumericStats creates a new stats tracker

func (*NumericStats) IsOutlier ¶

func (s *NumericStats) IsOutlier(x float64, k float64) bool

IsOutlier checks if a value is more than k standard deviations from the mean. Common values for k: 2.0 (95%), 3.0 (99.7%), 4.0 (99.99%) Returns false if insufficient data (N < 30) for statistical significance.

func (*NumericStats) Snapshot ¶

func (s *NumericStats) Snapshot() NumericStatsSnapshot

Snapshot returns a copy of the current statistics

func (*NumericStats) StdDev ¶

func (s *NumericStats) StdDev() float64

StdDev returns the sample standard deviation.

func (*NumericStats) Update ¶

func (s *NumericStats) Update(x float64)

Update incorporates a new value using Welford's algorithm. This is numerically stable for computing variance in a single pass.

func (*NumericStats) Variance ¶

func (s *NumericStats) Variance() float64

Variance returns the sample variance (N-1 denominator). Returns 0 if fewer than 2 samples.

func (*NumericStats) ZScore ¶

func (s *NumericStats) ZScore(x float64) float64

ZScore calculates how many standard deviations x is from the mean. Returns 0 if insufficient data.

type NumericStatsRegistry ¶

type NumericStatsRegistry struct {
	// contains filtered or unexported fields
}

NumericStatsRegistry manages stats for multiple streams and field paths. Thread-safe for concurrent access.

func NewNumericStatsRegistry ¶

func NewNumericStatsRegistry() *NumericStatsRegistry

NewNumericStatsRegistry creates a new registry

func (*NumericStatsRegistry) Clear ¶

func (r *NumericStatsRegistry) Clear(streamID string)

Clear removes all stats for a stream

func (*NumericStatsRegistry) GetOrCreate ¶

func (r *NumericStatsRegistry) GetOrCreate(streamID, fieldPath string) *NumericStats

GetOrCreate returns stats for a stream/field, creating if needed

func (*NumericStatsRegistry) GetStreamStats ¶

func (r *NumericStatsRegistry) GetStreamStats(streamID string) map[string]NumericStatsSnapshot

GetStreamStats returns all stats for a stream

type NumericStatsSnapshot ¶

type NumericStatsSnapshot struct {
	N      int64   `json:"n"`
	Mean   float64 `json:"mean"`
	StdDev float64 `json:"std_dev"`
	Min    float64 `json:"min"`
	Max    float64 `json:"max"`
}

NumericStatsSnapshot is a point-in-time copy of statistics

type RedisConfig ¶

type RedisConfig struct {
	Enabled  bool   `mapstructure:"enabled"`
	Addr     string `mapstructure:"addr"`
	Password string `mapstructure:"password"`
	DB       int    `mapstructure:"db"`
	Prefix   string `mapstructure:"prefix"`
}

RedisConfig holds Redis settings for distributed state management

type Tokenizer ¶

type Tokenizer struct {
	// contains filtered or unexported fields
}

Tokenizer preprocesses data to replace high-entropy fields with tokens

func GetTokenizer ¶

func GetTokenizer(cfg TokenizerConfig) *Tokenizer

GetTokenizer returns a shared tokenizer for the given configuration. This avoids repeated regex compilation overhead.

func NewTokenizer ¶

func NewTokenizer(cfg TokenizerConfig) *Tokenizer

NewTokenizer creates a tokenizer with the given configuration

func (*Tokenizer) Config ¶

func (t *Tokenizer) Config() TokenizerConfig

Config returns the current tokenizer configuration

func (*Tokenizer) ResetStats ¶

func (t *Tokenizer) ResetStats()

ResetStats resets all statistics to zero

func (*Tokenizer) Stats ¶

func (t *Tokenizer) Stats() TokenizerStats

Stats returns a copy of the current tokenization statistics

func (*Tokenizer) Tokenize ¶

func (t *Tokenizer) Tokenize(data []byte) []byte

Tokenize replaces high-entropy fields with tokens Returns the tokenized data (new allocation, original unchanged)

func (*Tokenizer) UpdateConfig ¶

func (t *Tokenizer) UpdateConfig(cfg TokenizerConfig)

UpdateConfig updates the tokenizer configuration

type TokenizerConfig ¶

type TokenizerConfig struct {
	EnableUUID   bool `json:"enable_uuid"`   // Replace UUIDs with <UUID>
	EnableHash   bool `json:"enable_hash"`   // Replace hex hashes with <HASH>
	EnableBase64 bool `json:"enable_base64"` // Replace Base64 with <B64>
	EnableJWT    bool `json:"enable_jwt"`    // Replace JWTs with <JWT>
}

TokenizerConfig controls which high-entropy patterns to replace

func DefaultTokenizerConfig ¶

func DefaultTokenizerConfig() TokenizerConfig

DefaultTokenizerConfig returns a configuration with all patterns enabled

type TokenizerStats ¶

type TokenizerStats struct {
	JWTCount    int64
	UUIDCount   int64
	HashCount   int64
	Base64Count int64
	BytesSaved  int64
}

TokenizerStats tracks tokenization statistics for observability

type ValueOutlier ¶

type ValueOutlier struct {
	FieldPath    string  `json:"field_path"`
	Value        float64 `json:"value"`
	ExpectedMean float64 `json:"expected_mean"`
	StdDev       float64 `json:"std_dev"`
	ZScore       float64 `json:"z_score"`
	SampleCount  int64   `json:"sample_count"`
}

ValueOutlier represents a detected numeric value anomaly

func CheckForOutliers ¶

func CheckForOutliers(
	registry *NumericStatsRegistry,
	streamID string,
	event []byte,
	kSigma float64,
	updateStats bool,
) ([]ValueOutlier, error)

CheckForOutliers analyzes numeric fields in an event against historical stats. Returns any fields that exceed the k-sigma threshold.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
kafka

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL