Skip to main content
Keep real user data out of your traces. Your traces carry messages, prompts, and responses — anything sensitive in them follows into Arize. Use masking to hide entire categories (all inputs, all outputs, embedding vectors) and redaction to remove specific patterns (emails, SSNs, credit cards) before spans leave your application.

Mask Span Attributes

The OpenInference specification defines environment variables to control what data is captured. You can also set them in code via TraceConfig.
Environment VariableEffectDefault
OPENINFERENCE_HIDE_INPUTSHides input value, all input messages & embedding input textFalse
OPENINFERENCE_HIDE_OUTPUTSHides output value & all output messagesFalse
OPENINFERENCE_HIDE_INPUT_MESSAGESHides all input messages & embedding input textFalse
OPENINFERENCE_HIDE_OUTPUT_MESSAGESHides all output messagesFalse
OPENINFERENCE_HIDE_INPUT_IMAGESHides images from input messagesFalse
OPENINFERENCE_HIDE_INPUT_TEXTHides text from input messages & input embeddingsFalse
OPENINFERENCE_HIDE_OUTPUT_TEXTHides text from output messagesFalse
OPENINFERENCE_HIDE_EMBEDDING_VECTORSHides returned embedding vectorsFalse
OPENINFERENCE_HIDE_LLM_INVOCATION_PARAMETERSHides LLM invocation parameters (temperature, top_p, etc.)False
OPENINFERENCE_HIDE_PROMPTSHides prompt strings on LLM spansFalse
OPENINFERENCE_HIDE_CHOICESHides completion choices on LLM spansFalse
OPENINFERENCE_BASE64_IMAGE_MAX_LENGTHLimits characters of a base64 encoding of an image32,000
Precedence order: TraceConfig in code > environment variables > defaults.
from openinference.instrumentation import TraceConfig
from openinference.instrumentation.openai import OpenAIInstrumentor

config = TraceConfig(
    hide_inputs=False,
    hide_outputs=False,
    hide_input_messages=False,
    hide_output_messages=False,
    hide_input_images=True,
    hide_input_text=False,
    hide_output_text=False,
    hide_embedding_vectors=True,
    base64_image_max_length=5000,
)

OpenAIInstrumentor().instrument(
    tracer_provider=tracer_provider,
    config=config,
)
Masking hides entire attribute categories. For finer control — scrubbing specific patterns like emails or SSNs while keeping useful context — write a custom span processor:

Redact PII with Custom Span Processors

For finer-grained control, create a custom span processor that detects and redacts PII patterns before spans are exported. This runs in your application — sensitive data never leaves your infrastructure.

Regex-Based Redaction

This processor supports: email addresses, phone numbers, SSNs, credit card numbers, IP addresses, and dates of birth. Add your own patterns as needed.
import re
import json
from typing import Any, Dict, Optional
from opentelemetry.sdk.trace import SpanProcessor
from opentelemetry.sdk.trace.export import SpanExporter
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace import Span

class PIIRedactingSpanProcessor(SpanProcessor):
    def __init__(self, exporter: SpanExporter, pii_patterns: Optional[Dict[str, str]] = None):
        self._exporter = exporter
        self._default_patterns = {
            "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
            "phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b",
            "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
            "credit_card": r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b",
            "ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
            "date_of_birth": r"\b\d{2}[-/]\d{2}[-/]\d{4}\b",
        }
        self._patterns = {**self._default_patterns, **(pii_patterns or {})}
        self._compiled_patterns = {
            name: re.compile(pattern) for name, pattern in self._patterns.items()
        }

    def _redact_string(self, value: str) -> str:
        redacted = value
        for pattern_name, pattern in self._compiled_patterns.items():
            redacted = pattern.sub(f"[REDACTED_{pattern_name.upper()}]", redacted)
        return redacted

    def _redact_value(self, value: Any) -> Any:
        if isinstance(value, str):
            try:
                json_obj = json.loads(value)
                return json.dumps(self._redact_value(json_obj))
            except json.JSONDecodeError:
                return self._redact_string(value)
        elif isinstance(value, dict):
            return {k: self._redact_value(v) for k, v in value.items()}
        elif isinstance(value, list):
            return [self._redact_value(item) for item in value]
        elif isinstance(value, (int, float, bool, type(None))):
            return value
        return self._redact_string(str(value))

    def on_start(self, span: Span, parent_context=None):
        pass

    def on_end(self, span: ReadableSpan):
        redacted_attributes = {}
        for key, value in span.attributes.items():
            if key in {"service.name", "telemetry.sdk.name", "telemetry.sdk.version"}:
                redacted_attributes[key] = value
                continue
            redacted_attributes[key] = self._redact_value(value)
        # Create redacted span copy and export
        redacted_span = ReadableSpan(
            name=self._redact_string(span.name),
            context=span.get_span_context(),
            parent=span.parent,
            resource=span.resource,
            attributes=redacted_attributes,
            events=span.events,
            links=span.links,
            kind=span.kind,
            status=span.status,
            start_time=span.start_time,
            end_time=span.end_time,
            instrumentation_scope=span.instrumentation_scope,
        )
        self._exporter.export([redacted_span])

    def shutdown(self):
        self._exporter.shutdown()

    def force_flush(self, timeout_millis: int = 30000):
        self._exporter.force_flush(timeout_millis)

Wire It Up

from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter

tracer_provider = trace_sdk.TracerProvider(
    resource=Resource(attributes=trace_attributes)
)

# PII processor redacts before exporting
pii_processor = PIIRedactingSpanProcessor(OTLPSpanExporter(endpoint))
tracer_provider.add_span_processor(pii_processor)

trace_api.set_tracer_provider(tracer_provider=tracer_provider)
OpenAIInstrumentor().instrument()
Arize AX trace showing PII redaction — user input with [REDACTED_EMAIL] and [REDACTED_PHONE] in place of real values
Regex works for well-structured patterns like emails, SSNs, or credit cards. For less predictable entities — names, locations, organizations — use NLP-based detection:

Microsoft Presidio Integration

For teams that need NLP-based PII detection (beyond regex), use Microsoft Presidio as a drop-in replacement for the regex patterns. Presidio supports entity types like PERSON, EMAIL_ADDRESS, PHONE_NUMBER, US_SSN, CREDIT_CARD, IP_ADDRESS, LOCATION, US_PASSPORT, and more.
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

class PresidioRedactionSpanProcessor(SpanProcessor):
    def __init__(self, exporter, entities=None, language="en"):
        self._exporter = exporter
        self._default_entities = [
            "PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER", "US_SSN",
            "CREDIT_CARD", "IP_ADDRESS", "DATE_TIME", "US_BANK_NUMBER",
            "US_DRIVER_LICENSE", "LOCATION", "NRP", "US_PASSPORT",
        ]
        self._entities = entities or self._default_entities

        nlp_configuration = {
            "nlp_engine_name": "spacy",
            "models": [{"lang_code": language, "model_name": "en_core_web_lg"}],
        }
        nlp_engine = NlpEngineProvider(nlp_configuration=nlp_configuration).create_engine()
        self._analyzer = AnalyzerEngine(nlp_engine=nlp_engine)
        self._anonymizer = AnonymizerEngine()
        self._operators = {
            entity: OperatorConfig("replace", {"new_value": f"[REDACTED_{entity}]"})
            for entity in self._entities
        }

    def _redact_string(self, value):
        if not value.strip():
            return value
        results = self._analyzer.analyze(text=value, entities=self._entities, language="en")
        if results:
            return self._anonymizer.anonymize(text=value, analyzer_results=results, operators=self._operators).text
        return value

    def on_start(self, span, parent_context=None):
        pass

    def on_end(self, span):
        # Same redaction logic as the regex processor, but using self._redact_string
        # which uses Presidio instead of regex patterns
        ...

    def shutdown(self):
        self._exporter.shutdown()

    def force_flush(self, timeout_millis=30000):
        self._exporter.force_flush(timeout_millis)

Next step

Deploy at scale with OTEL Collector patterns, async tracing, and custom sampling:

Next: Advanced Tracing Patterns