# ================================================================ # Machinery Planet - Robots.txt (SEO Optimized) # Updated: 2025-11-26 # Purpose: Allow search engines to crawl valuable content while # blocking duplicate/low-value pages # ================================================================ # ==================== # MAIN SEARCH ENGINES # ==================== User-agent: * Allow: / # Block admin and development paths Disallow: /admin Disallow: /admin/ Disallow: /_next/ Disallow: /api/ Allow: /api/sitemap*.xml Allow: /api/health Disallow: /debug/ Disallow: /test/ Disallow: /staging/ # ==================== # SEARCH & FILTERING # ==================== # ✅ CRITICAL FIX: Allow category and brand pages but block filters/sorting # Allow valuable pages: Allow: /search?categories=* Allow: /search?childCategories=* Allow: /search?make=* Allow: /search?model=* Allow: /search?productType=* Allow: /search$ Allow: /search/$ # Block duplicate content parameters (must come AFTER Allow rules) Disallow: /search?*sort=* Disallow: /search?*filter=* Disallow: /search?*page=* Disallow: /search?*limit=* Disallow: /search?*offset=* Disallow: /search?*view=* # Block search with multiple filter combinations (low value) Disallow: /search?*&*&*&* # ==================== # STATIC ASSETS # ==================== # Allow crawling of important assets for proper rendering Allow: /images/ Allow: /icons/ Allow: /fonts/ Allow: /*.css Allow: /*.js Allow: /*.woff Allow: /*.woff2 Allow: /*.jpg Allow: /*.jpeg Allow: /*.png Allow: /*.webp Allow: /*.avif Allow: /*.svg Allow: /*.gif Allow: /favicon.ico Allow: /robots.txt Allow: /sitemap*.xml # ==================== # PRIVATE DIRECTORIES # ==================== Disallow: /private/ Disallow: /temp/ Disallow: /cache/ Disallow: /.git/ Disallow: /node_modules/ Disallow: /.next/ # ==================== # SPECIAL USER AGENTS # ==================== # Block AI crawlers (GPT, Claude, etc.) User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Google-Extended Disallow: / User-agent: PerplexityBot Disallow: / # ==================== # SEO TOOL CRAWLERS # ==================== # Allow but rate-limit aggressive SEO crawlers User-agent: SemrushBot Crawl-delay: 5 Allow: / User-agent: AhrefsBot Crawl-delay: 5 Allow: / User-agent: DotBot Crawl-delay: 5 Allow: / User-agent: Screaming Frog SEO Spider Allow: / # ==================== # BAD BOTS (Optional) # ==================== # Block known bad bots/scrapers User-agent: MJ12bot Disallow: / User-agent: SemrushBot User-agent: AhrefsBot User-agent: Baiduspider Crawl-delay: 10 # ==================== # SITEMAPS # ==================== # ✅ FIXED: Only reference sitemaps for THIS domain Sitemap: https://www.machineryplanet.ae/api/sitemap-index.xml Sitemap: https://www.machineryplanet.ae/api/sitemap.xml Sitemap: https://www.machineryplanet.ae/api/sitemap-products.xml Sitemap: https://www.machineryplanet.ae/api/sitemap-categories.xml Sitemap: https://www.machineryplanet.ae/api/sitemap-blogs.xml Sitemap: https://www.machineryplanet.ae/api/sitemap-images.xml # ==================== # HOST PREFERENCE # ==================== # Preferred domain (www version) Host: www.machineryplanet.ae # ==================== # NOTES FOR DEVELOPERS # ==================== # 1. This file allows Google to crawl 12,800+ product/category pages # 2. Blocks only duplicate/filtered versions to save crawl budget # 3. AI crawlers blocked to prevent content scraping # 4. SEO crawlers rate-limited but allowed for auditing # 5. All sitemaps reference THIS domain only (no cross-domain refs) # ================================================================