# ============================================================= # robots.txt — Sun Shine Lawfirm (nepallawsunshine.com) # Last updated: March 2026 # ============================================================= # ============================================================= # SECTION 1 — MAJOR SEARCH ENGINES # ============================================================= User-agent: Googlebot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Crawl-delay: 2 User-agent: Googlebot-Image Allow: /wp-content/uploads/ Disallow: User-agent: Googlebot-Video Allow: / User-agent: Storebot-Google Disallow: / User-agent: Google-InspectionTool Allow: / User-agent: Bingbot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Crawl-delay: 3 User-agent: Slurp Allow: / Disallow: /wp-admin/ Crawl-delay: 5 User-agent: DuckDuckBot Allow: / Disallow: /wp-admin/ Crawl-delay: 3 # ============================================================= # SECTION 2 — AI LANGUAGE MODEL CRAWLERS # Full access granted for AI Overviews, citations, and answers # ============================================================= # Google AI (Gemini, AI Overviews, SGE) User-agent: Google-Extended Allow: / # OpenAI / ChatGPT User-agent: GPTBot Allow: / Disallow: /wp-admin/ User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # Anthropic / Claude User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / User-agent: Claude-Web Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Meta AI (Llama) User-agent: Meta-ExternalAgent Allow: / User-agent: Meta-ExternalFetcher Allow: / # Cohere User-agent: cohere-ai Allow: / # Apple (Siri, Apple Intelligence) User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Microsoft Copilot / Bing AI User-agent: msnbot Allow: / User-agent: BingPreview Allow: / # Amazon Alexa / Rufus User-agent: Amazonbot Allow: / # You.com User-agent: YouBot Allow: / # Brave Search AI User-agent: Brave-Search Allow: / # Bytedance / TikTok AI User-agent: Bytespider Allow: / # Common Crawl (used to train many LLMs) User-agent: CCBot Allow: / # Diffbot (used by many AI knowledge graphs) User-agent: Diffbot Allow: / # Timpibot (AI search) User-agent: Timpibot Allow: / # iAsk AI User-agent: iaskspider Allow: / # ============================================================= # SECTION 3 — SEO & RESEARCH TOOLS # Allow for backlink monitoring and SEO health # ============================================================= User-agent: AhrefsBot Allow: / Crawl-delay: 10 User-agent: SemrushBot Allow: / Crawl-delay: 10 User-agent: MJ12bot Allow: / Crawl-delay: 10 User-agent: DotBot Allow: / Crawl-delay: 10 User-agent: rogerbot Allow: / Crawl-delay: 10 # ============================================================= # SECTION 4 — SOCIAL MEDIA PREVIEWS # Needed for link previews when sharing content # ============================================================= User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: Slackbot Allow: / User-agent: TelegramBot Allow: / # ============================================================= # SECTION 5 — GLOBAL DISALLOW RULES # Block all bots from WordPress internals and junk URLs # ============================================================= User-agent: * Allow: / Allow: /our-capabilities/ Allow: /services/ Allow: /insights/ Allow: /lawyer/ Allow: /about/ Allow: /contact/ Allow: /wp-content/uploads/ # WordPress core — never needs to be crawled Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /xmlrpc.php Disallow: /wp-json/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/themes/ Disallow: /wp-content/cache/ # WordPress junk URL patterns — duplicate content risk Disallow: /?s= Disallow: /?p= Disallow: /?page_id= Disallow: /page/ Disallow: /?attachment_id= Disallow: /feed/ Disallow: /comments/feed/ Disallow: /trackback/ Disallow: /xmlrpc # Staging, test, admin paths Disallow: /staging/ Disallow: /dev/ Disallow: /test/ Disallow: /backup/ Disallow: /tmp/ Disallow: /log/ Disallow: /logs/ Disallow: /cdn-cgi/ # Tag and archive pages (thin content, low SEO value) Disallow: /tag/ Disallow: /author/ Disallow: /date/ # Search result pages Disallow: /?s=* Disallow: /search/ # URL parameters that create duplicate content Disallow: /*?replytocom= Disallow: /*?doing_wp_cron Disallow: /*?ver= Disallow: /*&ver= # ============================================================= # SECTION 6 — BLOCK KNOWN MALICIOUS / JUNK CRAWLERS # ============================================================= User-agent: MauiBot Disallow: / User-agent: AhrefsBot-BETA Disallow: / User-agent: SeznamBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Pinterestbot Disallow: / User-agent: ia_archiver Disallow: / User-agent: archive.org_bot Disallow: / User-agent: Scrapy Disallow: / User-agent: python-requests Disallow: / User-agent: curl Disallow: / User-agent: libwww-perl Disallow: / User-agent: lwp-trivial Disallow: / User-agent: WGet Disallow: / User-agent: HTTrack Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: WebBandit Disallow: / User-agent: EmailWolf Disallow: / # ============================================================= # SECTION 7 — SITEMAPS # ============================================================= Sitemap: https://nepallawsunshine.com/sitemap.xml Sitemap: https://nepallawsunshine.com/sitemap_index.xml Sitemap: https://nepallawsunshine.com/wp-sitemap.xml