# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.

# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/wc/robots.html

User-agent: *

# Sitemap
Sitemap: https://www.cuatrecasas.com/sitemap.xml

# Directories
Disallow: /admin/
Disallow: /bundles/
Disallow: /bundles_old/
Disallow: /erecruiting/
Disallow: /images/
Allow: /images/cache/
Disallow: /img/
Disallow: /media_repository/
Allow: /summernote/
Allow: /resources/
Disallow: /web/
Allow: /web/assets/
Allow: /web/vendor/

# Files
#Disallow: /calendario.php


# Web_Service
#Disallow: /*noticias/table

# Certain social media sites are whitelisted to allow crawlers to access page markup when links to /images are shared. 
User-agent: Twitterbot
Allow: /images/

# --- Rules added for Artificial Intelligence (AI) bots --- 

# OpenAI (crawler for AI training) - Total restriction
User-agent: GPTBot
Disallow: /

# AI bots with access allowed under the same general restrictions
User-agent: OAI-SearchBot # ChatGPT Search (OpenAI)
User-agent: ChatGPT-User # ChatGPT (user-requested searches)
User-agent: PerplexityBot # Perplexity.ai (AI-powered search engine)
User-agent: Bingbot # Microsoft Bing (search/AI crawler)
User-agent: Googlebot # Google Search (search crawler)
User-agent: Google-Extended # Google AI services (e.g. Bard)
Disallow: /admin/
Disallow: /bundles/
Disallow: /bundles_old/
Disallow: /erecruiting/
Disallow: /images/
Allow: /images/cache/
Disallow: /img/
Disallow: /media_repository/
Allow: /summernote/
Allow: /resources/
Disallow: /web/
Allow: /web/assets/
Allow: /web/vendor/

# End of rules for AI bots