maternal-app/maternal-web/lib/security/promptSecurity.ts

/**
 * Prompt Injection Protection
 *
 * Detects and prevents malicious prompt injection attempts in AI inputs
 * to protect against system prompt manipulation, data exfiltration, and
 * jailbreaking attempts.
 */

export interface PromptValidationResult {
  isValid: boolean;
  reason?: string;
  sanitizedPrompt?: string;
  riskLevel: 'low' | 'medium' | 'high';
}

/**
 * Common prompt injection patterns to detect
 */
const INJECTION_PATTERNS = [
  // System prompt manipulation
  /ignore\s+(previous|above|all|prior)\s+(instructions?|prompts?|commands?)/gi,
  /ignore\s+all/gi, // Catch "ignore all"
  /disregard\s+(previous|above|all)\s+(instructions?|prompts?|commands?)/gi,
  /forget\s+(previous|above|all)\s+(instructions?|prompts?|commands?)/gi,
  /new\s+instructions?:/gi,
  /system\s+prompt/gi, // Catch "system prompt" anywhere
  /you\s+are\s+now/gi,
  /act\s+as\s+a\s+(?!parent|caregiver)/gi, // Allow parenting roles only

  // Role manipulation
  /pretend\s+to\s+be/gi,
  /simulate\s+being/gi,
  /roleplay\s+as/gi,

  // Data exfiltration attempts
  /show\s+me\s+(your|the)\s+(system|internal|hidden)/gi, // Catch "show me your system/internal/hidden"
  /your\s+(system|internal|hidden)\s+prompt/gi, // Catch "your system/internal prompt"
  /what\s+(is|are)\s+your\s+(instructions?|rules?|guidelines?)/gi,
  /reveal\s+your\s+(system|internal|hidden)/gi,
  /list\s+all\s+(users?|children|families)/gi,
  /show\s+all\s+data/gi,

  // Command injection
  /execute\s+code/gi,
  /run\s+command/gi,
  /shell\s+command/gi,

  // Jailbreak attempts
  /DAN\s+mode/gi, // "Do Anything Now"
  /developer\s+mode/gi,
  /admin\s+mode/gi,
  /sudo\s+mode/gi,
  /root\s+access/gi,

  // Prompt leaking
  /repeat\s+(the\s+)?above/gi,
  /what\s+was\s+your\s+(first|initial|original)/gi,
  /before\s+this\s+conversation/gi,
];

/**
 * Suspicious character sequences that may indicate encoding attacks
 */
const SUSPICIOUS_SEQUENCES = [
  /\u0000/g, // Null bytes
  /[\u200B-\u200D\uFEFF]/g, // Zero-width characters
  /[\u2060-\u2069]/g, // Invisible formatting characters
  /<script/gi, // HTML script tags
  /<iframe/gi, // HTML iframe tags
  /javascript:/gi, // JavaScript protocol
  /data:text\/html/gi, // Data URIs
];

/**
 * Maximum allowed lengths to prevent resource exhaustion
 */
const MAX_PROMPT_LENGTH = 2000; // characters
const MAX_LINE_LENGTH = 500; // characters per line
const MAX_REPEATED_CHARS = 20; // consecutive same character

/**
 * Validates and sanitizes user prompts for AI queries
 */
export function validatePrompt(prompt: string): PromptValidationResult {
  if (!prompt || typeof prompt !== 'string') {
    return {
      isValid: false,
      reason: 'Prompt must be a non-empty string',
      riskLevel: 'low',
    };
  }

  // Check length constraints
  if (prompt.length > MAX_PROMPT_LENGTH) {
    return {
      isValid: false,
      reason: `Prompt exceeds maximum length of ${MAX_PROMPT_LENGTH} characters`,
      riskLevel: 'medium',
    };
  }

  // Check for excessively long lines (may indicate copy-paste attacks)
  const lines = prompt.split('\n');
  const longLine = lines.find(line => line.length > MAX_LINE_LENGTH);
  if (longLine) {
    return {
      isValid: false,
      reason: 'Prompt contains excessively long lines',
      riskLevel: 'medium',
    };
  }

  // Check for suspicious repeated characters
  const repeatedCharsMatch = prompt.match(/(.)\1+/g);
  if (repeatedCharsMatch) {
    const maxRepeat = Math.max(...repeatedCharsMatch.map(m => m.length));
    if (maxRepeat > MAX_REPEATED_CHARS) {
      return {
        isValid: false,
        reason: 'Prompt contains suspicious repeated characters',
        riskLevel: 'medium',
      };
    }
  }

  // Check for suspicious character sequences
  for (const pattern of SUSPICIOUS_SEQUENCES) {
    if (pattern.test(prompt)) {
      return {
        isValid: false,
        reason: 'Prompt contains suspicious or hidden characters',
        riskLevel: 'high',
      };
    }
  }

  // Check for prompt injection patterns
  let riskLevel: 'low' | 'medium' | 'high' = 'low';
  const detectedPatterns: string[] = [];

  for (const pattern of INJECTION_PATTERNS) {
    if (pattern.test(prompt)) {
      detectedPatterns.push(pattern.source);
      riskLevel = 'high';
    }
  }

  if (detectedPatterns.length > 0) {
    return {
      isValid: false,
      reason: 'Prompt contains potential injection attempt',
      riskLevel: 'high',
    };
  }

  // Check for excessive special characters (may indicate encoding attack)
  const specialCharCount = (prompt.match(/[^a-zA-Z0-9\s.,!?'-]/g) || []).length;
  const specialCharRatio = specialCharCount / prompt.length;

  if (specialCharRatio > 0.3) {
    return {
      isValid: false,
      reason: 'Prompt contains excessive special characters',
      riskLevel: 'medium',
    };
  }

  // Sanitize the prompt (remove potentially dangerous elements)
  const sanitizedPrompt = sanitizePrompt(prompt);

  return {
    isValid: true,
    sanitizedPrompt,
    riskLevel: 'low',
  };
}

/**
 * Sanitizes prompt by removing potentially dangerous content
 */
function sanitizePrompt(prompt: string): string {
  let sanitized = prompt;

  // Remove HTML tags
  sanitized = sanitized.replace(/<[^>]*>/g, '');

  // Remove excessive whitespace
  sanitized = sanitized.replace(/\s+/g, ' ').trim();

  // Remove zero-width and invisible characters
  sanitized = sanitized.replace(/[\u200B-\u200D\uFEFF\u2060-\u2069]/g, '');

  // Remove control characters except newline and tab
  sanitized = sanitized.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/g, '');

  return sanitized;
}

/**
 * Context-aware validation for parenting assistant
 * Checks if the prompt is appropriate for a parenting/childcare context
 */
export function isParentingRelated(prompt: string): boolean {
  const parentingKeywords = [
    'baby', 'child', 'toddler', 'infant', 'kid',
    'feed', 'sleep', 'diaper', 'nap', 'bottle',
    'breastfeed', 'formula', 'meal', 'bedtime',
    'cry', 'fussy', 'teething', 'milestone',
    'development', 'growth', 'schedule', 'routine',
    'parent', 'mom', 'dad', 'caregiver',
  ];

  const lowerPrompt = prompt.toLowerCase();
  const wordCount = lowerPrompt.split(/\s+/).length;

  // For very short prompts, be more lenient
  if (wordCount <= 5) {
    return true;
  }

  // Check if prompt contains parenting-related keywords
  const hasParentingKeywords = parentingKeywords.some(keyword =>
    lowerPrompt.includes(keyword)
  );

  return hasParentingKeywords;
}

/**
 * Rate limiting helper - tracks prompt attempts per session
 */
class PromptRateLimiter {
  private attempts: Map<string, number[]> = new Map();
  private readonly maxAttempts = 5;
  private readonly windowMs = 60000; // 1 minute

  /**
   * Check if user has exceeded rate limit for suspicious prompts
   */
  checkRateLimit(userId: string): boolean {
    const now = Date.now();
    const userAttempts = this.attempts.get(userId) || [];

    // Filter out old attempts outside the time window
    const recentAttempts = userAttempts.filter(time => now - time < this.windowMs);

    if (recentAttempts.length >= this.maxAttempts) {
      return false; // Rate limit exceeded
    }

    // Add new attempt
    recentAttempts.push(now);
    this.attempts.set(userId, recentAttempts);

    return true; // Within rate limit
  }

  /**
   * Clear attempts for a user (e.g., after successful validation)
   */
  clearAttempts(userId: string): void {
    this.attempts.delete(userId);
  }
}

export const promptRateLimiter = new PromptRateLimiter();

/**
 * Logs suspicious prompt attempts for security monitoring
 */
export function logSuspiciousPrompt(
  prompt: string,
  userId: string | undefined,
  reason: string,
  riskLevel: string
): void {
  // In production, this should send to your security monitoring system
  console.warn('[SECURITY] Suspicious prompt detected:', {
    userId: userId || 'anonymous',
    reason,
    riskLevel,
    promptLength: prompt.length,
    timestamp: new Date().toISOString(),
    // Don't log the full prompt to avoid storing malicious content
    promptPreview: prompt.substring(0, 50) + '...',
  });

  // TODO: In production, send to Sentry or security monitoring service
  // if (process.env.NODE_ENV === 'production') {
  //   Sentry.captureMessage('Suspicious prompt attempt', {
  //     level: 'warning',
  //     tags: { riskLevel },
  //     extra: { userId, reason, promptLength: prompt.length },
  //   });
  // }
}

/**
 * Complete validation pipeline for AI prompts
 */
export function validateAIPrompt(
  prompt: string,
  userId?: string
): PromptValidationResult {
  // Step 1: Basic validation and sanitization
  const validationResult = validatePrompt(prompt);

  if (!validationResult.isValid) {
    logSuspiciousPrompt(
      prompt,
      userId,
      validationResult.reason || 'Unknown',
      validationResult.riskLevel
    );

    // Check rate limit for suspicious attempts
    if (userId && validationResult.riskLevel === 'high') {
      if (!promptRateLimiter.checkRateLimit(userId)) {
        return {
          isValid: false,
          reason: 'Too many suspicious prompts. Please try again later.',
          riskLevel: 'high',
        };
      }
    }

    return validationResult;
  }

  // Step 2: Context-aware validation
  if (!isParentingRelated(prompt)) {
    // Allow non-parenting questions but with a warning
    // This is more lenient to avoid false positives
    validationResult.riskLevel = 'medium';
  }

  // Clear rate limit on successful validation
  if (userId) {
    promptRateLimiter.clearAttempts(userId);
  }

  return validationResult;
}