# Melting Ice Cream Rush - robots.txt # Optimized for multi-language SPA with canonical consolidation User-agent: Googlebot Allow: / Crawl-delay: 1 User-agent: Bingbot Allow: / Crawl-delay: 2 User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / User-agent: * Allow: / # Block language parameter URLs to prevent duplicate content indexing # All language variants use path-based URLs (e.g., /es/recetas) not query params Disallow: /*?lang= Disallow: /*&lang= Disallow: /*?category= Disallow: /*&category= # Block print pages from indexing (they have noindex but this is belt-and-suspenders) Disallow: /*/print Disallow: /*/print/ Disallow: /recipes/*/print Disallow: /recipes/*/print/ # Block auth pages Disallow: /auth Disallow: /*/auth # Block internal prerender JSON data files (not user-facing content) Disallow: /prerender-*.json Disallow: /*.json$ # ============================================================ # AI / LLM training crawlers — blocked to protect content # These bots scrape sites to train commercial language models # ============================================================ User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: OAI-SearchBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: anthropic-ai Disallow: / User-agent: PerplexityBot Disallow: / User-agent: Perplexity-User Disallow: / User-agent: Google-Extended Disallow: / User-agent: CCBot Disallow: / User-agent: Bytespider Disallow: / User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: cohere-ai Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: Diffbot Disallow: / User-agent: FacebookBot Disallow: / User-agent: meta-externalagent Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: MistralAI Disallow: / User-agent: img2dataset Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: YouBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Timpibot Disallow: / User-agent: Webzio-Extended Disallow: / User-agent: Operator Disallow: / User-agent: xAI-Grok Disallow: / User-agent: xAI-Images Disallow: / User-agent: KagiBot Disallow: / User-agent: You.com Disallow: / User-agent: Scrapy Disallow: / User-agent: magpie-crawler Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SeekportBot Disallow: / # ============================================================ # SEO/Marketing scrapers & impression-inflating bots # These bots create fake page views that lower AdSense RPM # ============================================================ User-agent: BLEXBot Disallow: / User-agent: SerpstatBot Disallow: / User-agent: LinkpadBot Disallow: / User-agent: SurdotlyBot Disallow: / User-agent: Sogou Disallow: / User-agent: Sogou web spider Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: SplitSignalBot Disallow: / User-agent: ZoominfoBot Disallow: / User-agent: BrightEdge Disallow: / User-agent: Barkrowler Disallow: / User-agent: ICC-Crawler Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: TurnitinBot Disallow: / User-agent: NetEstate Disallow: / User-agent: Mediatoolkitbot Disallow: / User-agent: trendictionbot Disallow: / User-agent: ev-crawler Disallow: / User-agent: Pinterestbot Allow: / # Headless/automation that often fakes traffic User-agent: HeadlessChrome Disallow: / User-agent: PhantomJS Disallow: / User-agent: SlimerJS Disallow: / # Sitemap location (sitemap index — references all per-section sitemaps) Sitemap: https://meltingicecreamrush.com/sitemap.xml # IndexNow key for Bing, Yandex, Seznam, and Naver # Key file: https://meltingicecreamrush.com/c7f5e2a4b8d94f3e9c1a6b7d8e2f4a5c.txt