DevOps & Skalierung

OpenTelemetry-Tracing für CAPTCHA-Lösungspipelines

OpenTelemetry (OTel) ermöglicht Ihnen herstellerneutrales verteiltes Tracing. Instrumentieren Sie Ihre CAPTCHA-Lösungspipeline einmal und exportieren Sie Traces nach Jaeger, Zipkin, Datadog oder einem anderen OTel-kompatiblen Backend. Sehen Sie genau, wo Zeit aufgewendet wird – API-Übermittlung, Abfragen, Netzwerklatenz.

Spurstruktur

[Scrape Page]
  └── [Solve CAPTCHA]                    ← Parent span
        ├── [Submit Task]                ← HTTP POST to in.php
        ├── [Poll Result]               ← Repeated GET to res.php
        │     ├── [Poll Attempt 1]       ← CAPCHA_NOT_READY
        │     ├── [Poll Attempt 2]       ← CAPCHA_NOT_READY
        │     └── [Poll Attempt 3]       ← OK (solution)
        └── [Apply Token]               ← Inject into form

Python – OpenTelemetry-Instrumentierung

Einrichtung

pip install opentelemetry-api opentelemetry-sdk \
    opentelemetry-exporter-otlp \
    opentelemetry-instrumentation-requests

Umsetzung

import os
import time
import requests
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
    OTLPSpanExporter,
)
from opentelemetry.sdk.resources import Resource
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.trace import StatusCode

# Configure provider
resource = Resource.create({"service.name": "captcha-pipeline"})
provider = TracerProvider(resource=resource)

# Export to OTel Collector (or Jaeger/Zipkin directly)
exporter = OTLPSpanExporter(
    endpoint=os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT",
                            "http://localhost:4317")
)
provider.add_span_processor(BatchSpanProcessor(exporter))
trace.set_tracer_provider(provider)

# Auto-instrument requests library
RequestsInstrumentor().instrument()

tracer = trace.get_tracer("captchaai.solver")
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
session = requests.Session()


def solve_captcha(sitekey, pageurl, captcha_type="recaptcha_v2"):
    """Solve a CAPTCHA with full OpenTelemetry tracing."""
    with tracer.start_as_current_span(
        "captcha.solve",
        attributes={
            "captcha.type": captcha_type,
            "captcha.target_url": pageurl,
        }
    ) as solve_span:

        # Submit phase
        with tracer.start_as_current_span("captcha.submit") as submit_span:
            resp = session.post("https://ocr.captchaai.com/in.php", data={
                "key": API_KEY,
                "method": "userrecaptcha",
                "googlekey": sitekey,
                "pageurl": pageurl,
                "json": 1
            })
            data = resp.json()
            submit_span.set_attribute("http.status_code", resp.status_code)

            if data.get("status") != 1:
                error = data.get("request", "UNKNOWN")
                submit_span.set_status(StatusCode.ERROR, error)
                submit_span.set_attribute("captcha.error", error)
                solve_span.set_status(StatusCode.ERROR, error)
                return {"error": error}

            captcha_id = data["request"]
            submit_span.set_attribute("captcha.id", captcha_id)
            solve_span.set_attribute("captcha.id", captcha_id)

        # Poll phase
        with tracer.start_as_current_span("captcha.poll") as poll_span:
            poll_count = 0
            poll_start = time.time()

            for _ in range(60):
                time.sleep(5)
                poll_count += 1

                with tracer.start_as_current_span(
                    f"captcha.poll.attempt",
                    attributes={"captcha.poll.number": poll_count}
                ) as attempt_span:
                    result = session.get(
                        "https://ocr.captchaai.com/res.php",
                        params={
                            "key": API_KEY,
                            "action": "get",
                            "id": captcha_id,
                            "json": 1
                        }
                    ).json()

                    if result.get("status") == 1:
                        attempt_span.set_attribute("captcha.poll.ready", True)
                        elapsed = time.time() - poll_start
                        poll_span.set_attribute("captcha.poll.count", poll_count)
                        poll_span.set_attribute(
                            "captcha.poll.duration_s", round(elapsed, 2)
                        )
                        solve_span.set_attribute(
                            "captcha.solve_time_s", round(elapsed, 2)
                        )
                        solve_span.set_status(StatusCode.OK)
                        return {
                            "solution": result["request"],
                            "elapsed": elapsed,
                            "polls": poll_count
                        }

                    if result.get("request") != "CAPCHA_NOT_READY":
                        error = result.get("request", "UNKNOWN")
                        attempt_span.set_status(StatusCode.ERROR, error)
                        poll_span.set_status(StatusCode.ERROR, error)
                        solve_span.set_status(StatusCode.ERROR, error)
                        return {"error": error}

                    attempt_span.set_attribute("captcha.poll.ready", False)

            poll_span.set_attribute("captcha.poll.count", poll_count)
            poll_span.set_status(StatusCode.ERROR, "TIMEOUT")
            solve_span.set_status(StatusCode.ERROR, "TIMEOUT")
            return {"error": "TIMEOUT"}

JavaScript – OpenTelemetry-Instrumentierung

Einrichtung

npm install @opentelemetry/api @opentelemetry/sdk-node \
    @opentelemetry/sdk-trace-node \
    @opentelemetry/exporter-trace-otlp-grpc \
    @opentelemetry/instrumentation-http

Umsetzung

const { NodeSDK } = require("@opentelemetry/sdk-node");
const { OTLPTraceExporter } = require("@opentelemetry/exporter-trace-otlp-grpc");
const { HttpInstrumentation } = require("@opentelemetry/instrumentation-http");
const { trace, SpanStatusCode } = require("@opentelemetry/api");
const axios = require("axios");

// Initialize SDK
const sdk = new NodeSDK({
  serviceName: "captcha-pipeline",
  traceExporter: new OTLPTraceExporter({
    url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || "http://localhost:4317",
  }),
  instrumentations: [new HttpInstrumentation()],
});
sdk.start();

const tracer = trace.getTracer("captchaai.solver");
const API_KEY = process.env.CAPTCHAAI_API_KEY;

async function solveCaptchaWithTracing(sitekey, pageurl, captchaType = "recaptcha_v2") {
  return tracer.startActiveSpan("captcha.solve", {
    attributes: { "captcha.type": captchaType, "captcha.target_url": pageurl },
  }, async (solveSpan) => {
    try {
      // Submit
      const captchaId = await tracer.startActiveSpan(
        "captcha.submit",
        async (submitSpan) => {
          try {
            const resp = await axios.post("https://ocr.captchaai.com/in.php", null, {
              params: {
                key: API_KEY, method: "userrecaptcha",
                googlekey: sitekey, pageurl, json: 1,
              },
            });

            if (resp.data.status !== 1) {
              submitSpan.setStatus({ code: SpanStatusCode.ERROR, message: resp.data.request });
              throw new Error(resp.data.request);
            }

            submitSpan.setAttribute("captcha.id", resp.data.request);
            return resp.data.request;
          } finally {
            submitSpan.end();
          }
        }
      );

      solveSpan.setAttribute("captcha.id", captchaId);

      // Poll
      return await tracer.startActiveSpan("captcha.poll", async (pollSpan) => {
        try {
          let pollCount = 0;
          const pollStart = Date.now();

          for (let i = 0; i < 60; i++) {
            await new Promise((r) => setTimeout(r, 5000));
            pollCount++;

            const result = await tracer.startActiveSpan(
              "captcha.poll.attempt",
              { attributes: { "captcha.poll.number": pollCount } },
              async (attemptSpan) => {
                try {
                  const resp = await axios.get("https://ocr.captchaai.com/res.php", {
                    params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
                  });
                  attemptSpan.setAttribute("captcha.poll.ready", resp.data.status === 1);
                  return resp.data;
                } finally {
                  attemptSpan.end();
                }
              }
            );

            if (result.status === 1) {
              const elapsed = (Date.now() - pollStart) / 1000;
              pollSpan.setAttribute("captcha.poll.count", pollCount);
              solveSpan.setAttribute("captcha.solve_time_s", elapsed);
              solveSpan.setStatus({ code: SpanStatusCode.OK });
              return { solution: result.request, elapsed, polls: pollCount };
            }

            if (result.request !== "CAPCHA_NOT_READY") {
              throw new Error(result.request);
            }
          }
          throw new Error("TIMEOUT");
        } catch (err) {
          pollSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
          throw err;
        } finally {
          pollSpan.end();
        }
      });
    } catch (err) {
      solveSpan.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
      return { error: err.message };
    } finally {
      solveSpan.end();
    }
  });
}

module.exports = { solveCaptchaWithTracing };

OTel Collector-Konfiguration

# otel-collector-config.yaml
receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317

processors:
  batch:
    timeout: 5s

exporters:
  jaeger:
    endpoint: jaeger:14250
    tls:
      insecure: true
  # Or export to Datadog, New Relic, etc.

service:
  pipelines:
    traces:
      receivers: [otlp]
      processors: [batch]
      exporters: [jaeger]

Was Sie in Traces sehen werden

Span-Attribut Wert Einblick
captcha.type recaptcha_v2 Welche CAPTCHA-Typen dauern am längsten?
captcha.solve_time_s 24.5 Tatsächliche Lösungslatenz
captcha.poll.count 5 Wie viele Umfragen wurden benötigt?
captcha.error ERROR_WRONG_CAPTCHA_ID Aufschlüsselung der Fehlertypen
captcha.id 73519... Verfolgen Sie spezifische Lösungsversuche

Fehlerbehebung

Problem Ursache Lösung
Worker ist erreichbar, verarbeitet aber keine Aufgaben Queue, Credentials oder Eingabestrom stimmen nicht Prüfe Queue-Tiefe, API-Key, Health-Checks und Fehlerraten pro Worker gemeinsam
Fehlerrate steigt nach Rollout Neue Version verändert Session-, Proxy- oder Retry-Verhalten Vergleiche erfolgreiche und fehlschlagende Runs zwischen alter und neuer Version und rolle bei Bedarf zurück
Canary oder Health-Check bleibt rot Abhängigkeiten, Zeitlimits oder Secrets weichen von der Zielumgebung ab Prüfe Secrets, Netzwerkpfade und Schwellenwerte in exakt derselben Umgebung

Verwandte Leitfäden

  • CAPTCHA-Lösungsraten mit Prometheus/Grafana überwachen
  • Strukturiertes Logging für CAPTCHA-Vorgänge
  • Zeitreihen-CAPTCHA-Performance-Trends
Kommentare sind für diesen Artikel deaktiviert.