Reisebuchungsseiten blockieren automatisierte Fahrpreisprüfungen aggressiv mit CAPTCHAs und Bot-Erkennung. Mit CaptchaAI können Sie durch diese Abwehrmechanismen Flug- und Hotelpreise zuverlässig überwachen.
CAPTCHA-Landschaft auf Reiseseiten
| Site-Kategorie | CAPTCHA-Typ | Schwierigkeit |
|---|---|---|
| Fluggesellschaften (direkt) | reCAPTCHA v3, Cloudflare | Mittel |
| OTAs (Expedia, Buchung) | reCAPTCHA v2, Drehkreuz | Mittelhoch |
| Metasuche (Google Flights, Kayak) | reCAPTCHA v3 | Mittel |
| Billigflieger | Bild-CAPTCHA, reCAPTCHA | Niedrig-Mittel |
| Hotelaggregatoren | Cloudflare Challenge | Hoch |
Implementierung des Tarifmonitors
import requests
import time
import re
import json
import os
from datetime import datetime, timedelta
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
def solve_captcha(params):
params["key"] = API_KEY
resp = requests.get("https://ocr.captchaai.com/in.php", params=params)
if not resp.text.startswith("OK|"):
raise Exception(f"Submit: {resp.text}")
task_id = resp.text.split("|")[1]
for _ in range(60):
time.sleep(5)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get", "id": task_id,
})
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(f"Solve: {result.text}")
raise TimeoutError()
class FareMonitor:
def __init__(self):
self.session = requests.Session()
self.session.headers["User-Agent"] = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0"
)
self.history = []
def fetch_with_captcha(self, url):
"""Fetch a travel page, solving CAPTCHAs if encountered."""
resp = self.session.get(url)
# reCAPTCHA v2/v3
match = re.search(
r'data-sitekey=["\']([A-Za-z0-9_-]+)["\']', resp.text
)
if match:
site_key = match.group(1)
# Detect v3 vs v2
if "recaptcha/api.js?render=" in resp.text:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
"version": "v3",
"action": "search",
})
else:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
resp = self.session.post(url, data={
"g-recaptcha-response": token,
})
# Cloudflare Turnstile
if "cf-turnstile" in resp.text:
match = re.search(
r'data-sitekey=["\']([^"\']+)', resp.text
)
if match:
token = solve_captcha({
"method": "turnstile",
"sitekey": match.group(1),
"pageurl": url,
})
resp = self.session.post(url, data={
"cf-turnstile-response": token,
})
return resp.text
def check_fares(self, routes):
"""Check fares for a list of routes."""
results = []
for route in routes:
try:
html = self.fetch_with_captcha(route["url"])
prices = self._extract_prices(html)
result = {
"route": f"{route['origin']}-{route['destination']}",
"date": route["date"],
"prices": prices,
"min_price": min(prices) if prices else None,
"timestamp": datetime.utcnow().isoformat(),
}
results.append(result)
self.history.append(result)
if prices:
print(f" {result['route']} ({route['date']}): "
f"${min(prices)}-${max(prices)}")
else:
print(f" {result['route']}: No prices found")
time.sleep(3) # Respectful delay
except Exception as e:
print(f" {route.get('origin', '?')}-"
f"{route.get('destination', '?')}: ERROR - {e}")
return results
def _extract_prices(self, html):
"""Extract prices from travel page HTML."""
prices = []
# Common price patterns
for match in re.finditer(
r'\$\s*([\d,]+(?:\.\d{2})?)', html
):
price = float(match.group(1).replace(",", ""))
if 20 < price < 10000: # Filter noise
prices.append(price)
return sorted(set(prices))
def detect_price_drops(self, threshold_pct=5):
"""Detect significant price drops in history."""
route_prices = {}
for entry in self.history:
key = f"{entry['route']}_{entry['date']}"
if key not in route_prices:
route_prices[key] = []
if entry["min_price"]:
route_prices[key].append(entry["min_price"])
alerts = []
for key, prices in route_prices.items():
if len(prices) >= 2:
prev = prices[-2]
current = prices[-1]
change_pct = ((current - prev) / prev) * 100
if change_pct < -threshold_pct:
alerts.append({
"route": key,
"previous": prev,
"current": current,
"change": f"{change_pct:.1f}%",
})
return alerts
def export_report(self, filename="fare_report.json"):
"""Export fare history to JSON."""
with open(filename, "w") as f:
json.dump(self.history, f, indent=2)
print(f"Exported {len(self.history)} fare checks to {filename}")
# Define routes to monitor
routes = [
{
"origin": "JFK",
"destination": "LAX",
"date": "2025-03-15",
"url": "https://example-airline.com/flights?from=JFK&to=LAX&date=2025-03-15",
},
{
"origin": "SFO",
"destination": "ORD",
"date": "2025-03-20",
"url": "https://example-airline.com/flights?from=SFO&to=ORD&date=2025-03-20",
},
]
monitor = FareMonitor()
results = monitor.check_fares(routes)
monitor.export_report()
Terminplanung
# Check fares every 4 hours
0 */4 * * * cd /opt/fare-monitor && python fare_monitor.py
Kostenanalyse
| Überwachungsebene | Routen | Schecks/Day | CAPTCHAs/Day | Schätzung: Kosten |
|---|---|---|---|---|
| Persönlich | 5 | 6/route | ~30 | 0,50 $ |
| Kleine Agentur | 50 | 4/route | ~200 | 2-5 $ |
| Unternehmen | 500 | 6/route | ~3.000 | 20-40 $ |
FAQ
Wie oft sollte ich die Tarife überprüfen?
Alle 4-6 Stunden für den persönlichen Gebrauch. Bei geschäftlicher Nutzung alle 1-2 Stunden. Fluggesellschaften aktualisieren die Preise stapelweise, sodass häufigere Kontrollen zu sinkenden Erträgen führen.
Kann ich auch Hotelpreise überwachen?
Ja. Der gleiche Ansatz funktioniert für Booking.com-, Expedia- und Hotel-Direktseiten. Passen Sie die Preisextraktionsmuster für Hotelseitenformate an.
Wie gehe ich mit dynamischen Preisseiten um?
Einige Reiseseiten erfordern JavaScript-Rendering. Verwenden Sie Selenium oder Playwright für den Seitenabruf und dann CaptchaAI für die CAPTCHA-Lösung.
Verwandte Leitfäden
- E-Commerce-Preisüberwachung
- Scraping ohne Blockierung
- Proxy-Rotation für CAPTCHA-Scraping