# robots.txt for sanatan365.com
# Last updated: 2026-04-21

# ===========================================
# Search Engine Crawlers — Allow All
# ===========================================
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: Slurp
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: YandexBot
Allow: /

# ===========================================
# LLM / AI Crawlers — Explicitly Allowed
# ===========================================
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Anthropic-AI
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: Bytespider
Allow: /

User-agent: CCBot
Allow: /

# ===========================================
# Default — Allow All Crawlers
# ===========================================
User-agent: *
Allow: /

# ===========================================
# Blocked Paths (internal/technical routes)
# ===========================================
User-agent: *
# Allow the SEO discovery files even though they live under /api/.
# More-specific Allow rules win over the broader Disallow per robots.txt spec.
Allow: /api/sitemap.xml
Allow: /api/llms.txt
Allow: /api/llms-full.txt
Disallow: /api/
Disallow: /admin/
Disallow: /_next/static/
Disallow: /_next/data/
Disallow: /_next/image

# ===========================================
# Sitemap
# ===========================================
# Dynamic source-of-truth (refreshes within 1h of any new article publish).
# Listed first so search engines prefer it over the static fallback below.
Sitemap: https://sanatan365.com/api/sitemap.xml
# Static fallback for crawlers that don't support multiple Sitemap directives
# or that don't honor /api/ paths. Refreshes on each manual deploy.
Sitemap: https://sanatan365.com/sitemap.xml