"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BulkProcessorService = void 0; const promises_1 = __importDefault(require("fs/promises")); const path_1 = __importDefault(require("path")); const csv_parser_1 = __importDefault(require("csv-parser")); const csv_writer_1 = require("csv-writer"); const zod_1 = require("zod"); const logger_1 = require("../lib/logger"); const prisma_1 = require("../lib/prisma"); const BulkJobCreateSchema = zod_1.z.object({ projectId: zod_1.z.string().optional(), urls: zod_1.z.array(zod_1.z.object({ url: zod_1.z.string().url('Invalid URL format'), label: zod_1.z.string().optional(), metadata: zod_1.z.record(zod_1.z.any()).optional(), })).min(1, 'At least one URL is required').max(1000, 'Maximum 1000 URLs per job'), options: zod_1.z.object({ method: zod_1.z.enum(['GET', 'POST', 'HEAD']).default('GET'), userAgent: zod_1.z.string().optional(), maxHops: zod_1.z.number().min(1).max(20).default(10), timeout: zod_1.z.number().min(1000).max(30000).default(15000), enableSSLAnalysis: zod_1.z.boolean().default(true), enableSEOAnalysis: zod_1.z.boolean().default(true), enableSecurityAnalysis: zod_1.z.boolean().default(true), headers: zod_1.z.record(zod_1.z.string()).optional(), }).default({}), }); const CsvRowSchema = zod_1.z.object({ url: zod_1.z.string().min(1, 'URL is required'), label: zod_1.z.string().optional(), method: zod_1.z.enum(['GET', 'POST', 'HEAD']).optional(), user_agent: zod_1.z.string().optional(), max_hops: zod_1.z.string().optional(), timeout: zod_1.z.string().optional(), enable_ssl: zod_1.z.string().optional(), enable_seo: zod_1.z.string().optional(), enable_security: zod_1.z.string().optional(), }); class BulkProcessorService { uploadsDir; inMemoryJobs = new Map(); constructor() { this.uploadsDir = path_1.default.join(process.cwd(), 'uploads'); this.ensureUploadsDirectory(); } async ensureUploadsDirectory() { try { await promises_1.default.mkdir(this.uploadsDir, { recursive: true }); } catch (error) { logger_1.logger.error('Failed to create uploads directory:', error); } } async parseCsvFile(filePath) { const results = []; return new Promise((resolve, reject) => { const stream = require('fs').createReadStream(filePath) .pipe((0, csv_parser_1.default)()) .on('data', (row) => { try { const validatedRow = CsvRowSchema.parse(row); let url = validatedRow.url.trim(); if (!url.startsWith('http://') && !url.startsWith('https://')) { url = `https://${url}`; } const parsedRow = { url, label: validatedRow.label?.trim() || undefined, metadata: { method: validatedRow.method || 'GET', userAgent: validatedRow.user_agent?.trim(), maxHops: validatedRow.max_hops ? parseInt(validatedRow.max_hops) : undefined, timeout: validatedRow.timeout ? parseInt(validatedRow.timeout) : undefined, enableSSL: this.parseBoolean(validatedRow.enable_ssl), enableSEO: this.parseBoolean(validatedRow.enable_seo), enableSecurity: this.parseBoolean(validatedRow.enable_security), }, }; results.push(parsedRow); } catch (error) { logger_1.logger.warn('Invalid CSV row skipped:', { row, error: error instanceof Error ? error.message : 'Unknown error' }); } }) .on('end', () => { logger_1.logger.info(`CSV parsing completed: ${results.length} valid URLs found`); resolve(results); }) .on('error', (error) => { logger_1.logger.error('CSV parsing failed:', error); reject(error); }); }); } parseBoolean(value) { if (!value) return undefined; const normalized = value.toLowerCase().trim(); if (normalized === 'true' || normalized === '1' || normalized === 'yes') return true; if (normalized === 'false' || normalized === '0' || normalized === 'no') return false; return undefined; } async createBulkJob(userId, organizationId, jobData, filePath) { try { const validatedData = BulkJobCreateSchema.parse(jobData); const jobId = `bulk_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; const bulkJob = await prisma_1.prisma.bulkJob.create({ data: { id: jobId, userId, organizationId: organizationId || null, projectId: validatedData.projectId || null, uploadPath: filePath || 'api', status: 'PENDING', totalUrls: validatedData.urls.length, processedUrls: 0, successfulUrls: 0, failedUrls: 0, configJson: JSON.stringify(validatedData.options), urlsJson: JSON.stringify(validatedData.urls), }, }); logger_1.logger.info(`Bulk job ${jobId} created with ${validatedData.urls.length} URLs`); setImmediate(() => { this.processBulkJobInMemory(jobId, validatedData.urls, validatedData.options); }); const job = { id: jobId, userId, organizationId, projectId: validatedData.projectId, urls: validatedData.urls, options: validatedData.options, status: 'PENDING', progress: { total: validatedData.urls.length, processed: 0, successful: 0, failed: 0, }, createdAt: bulkJob.createdAt, }; logger_1.logger.info(`Bulk tracking job created: ${jobId}`, { userId, urlCount: validatedData.urls.length, organizationId, }); return job; } catch (error) { logger_1.logger.error('Failed to create bulk job:', error); throw new Error(`Failed to create bulk job: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async createBulkJobFromCsv(userId, organizationId, filePath, projectId, options = {}) { try { const urls = await this.parseCsvFile(filePath); if (urls.length === 0) { throw new Error('No valid URLs found in CSV file'); } const jobData = { urls, options: { method: 'GET', maxHops: 10, timeout: 15000, enableSSLAnalysis: true, enableSEOAnalysis: true, enableSecurityAnalysis: true, ...options, }, projectId }; const job = await this.createBulkJob(userId, organizationId, jobData, filePath); await promises_1.default.unlink(filePath).catch(() => { }); return job; } catch (error) { await promises_1.default.unlink(filePath).catch(() => { }); throw error; } } async getBulkJob(jobId, userId) { try { const bulkJob = await prisma_1.prisma.bulkJob.findFirst({ where: { id: jobId, userId, }, }); if (!bulkJob) { return null; } const progress = bulkJob.totalUrls > 0 ? (bulkJob.processedUrls / bulkJob.totalUrls) * 100 : 0; const job = { id: bulkJob.id, userId: bulkJob.userId, ...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}), ...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}), urls: JSON.parse(bulkJob.urlsJson), options: JSON.parse(bulkJob.configJson), status: bulkJob.status, progress: { total: bulkJob.totalUrls, processed: bulkJob.processedUrls, successful: bulkJob.successfulUrls, failed: bulkJob.failedUrls, }, results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined, createdAt: bulkJob.createdAt, startedAt: bulkJob.startedAt || undefined, finishedAt: bulkJob.finishedAt || undefined, estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob), }; return job; } catch (error) { logger_1.logger.error('Failed to get bulk job:', error); return null; } } calculateEstimatedCompletion(bulkJob) { if (!bulkJob.startedAt || bulkJob.status === 'COMPLETED' || bulkJob.status === 'FAILED') { return undefined; } const elapsed = Date.now() - bulkJob.startedAt.getTime(); const processed = bulkJob.processedUrls; const remaining = bulkJob.totalUrls - processed; if (processed === 0) { return undefined; } const avgTimePerUrl = elapsed / processed; const estimatedRemainingTime = avgTimePerUrl * remaining; return new Date(Date.now() + estimatedRemainingTime); } async cancelBulkJob(jobId, userId) { try { await prisma_1.prisma.bulkJob.updateMany({ where: { id: jobId, userId, }, data: { status: 'CANCELLED', finishedAt: new Date(), }, }); logger_1.logger.info(`Bulk job cancelled: ${jobId}`, { userId }); return true; } catch (error) { logger_1.logger.error('Failed to cancel bulk job:', error); return false; } } async getUserBulkJobs(userId, limit = 20, offset = 0) { try { const bulkJobs = await prisma_1.prisma.bulkJob.findMany({ where: { userId }, orderBy: { createdAt: 'desc' }, take: limit, skip: offset, }); return Promise.all(bulkJobs.map(async (bulkJob) => { const job = { id: bulkJob.id, userId: bulkJob.userId, ...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}), ...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}), urls: JSON.parse(bulkJob.urlsJson), options: JSON.parse(bulkJob.configJson), status: bulkJob.status, progress: { total: bulkJob.totalUrls, processed: bulkJob.processedUrls, successful: bulkJob.successfulUrls, failed: bulkJob.failedUrls, }, results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined, createdAt: bulkJob.createdAt, startedAt: bulkJob.startedAt || undefined, finishedAt: bulkJob.finishedAt || undefined, estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob), }; return job; })); } catch (error) { logger_1.logger.error('Failed to get user bulk jobs:', error); return []; } } async exportResultsToCsv(jobId, userId) { try { const job = await this.getBulkJob(jobId, userId); if (!job || !job.results) { throw new Error('Job not found or no results available'); } const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const fileName = `bulk-results-${jobId}-${timestamp}.csv`; const filePath = path_1.default.join(this.uploadsDir, fileName); const csvWriter = (0, csv_writer_1.createObjectCsvWriter)({ path: filePath, header: [ { id: 'url', title: 'URL' }, { id: 'label', title: 'Label' }, { id: 'status', title: 'Status' }, { id: 'checkId', title: 'Check ID' }, { id: 'error', title: 'Error' }, { id: 'startedAt', title: 'Started At' }, { id: 'finishedAt', title: 'Finished At' }, { id: 'durationMs', title: 'Duration (ms)' }, ], }); const records = job.results.map(result => ({ url: result.url, label: result.label || '', status: result.status, checkId: result.checkId || '', error: result.error || '', startedAt: result.timing.startedAt.toISOString(), finishedAt: result.timing.finishedAt?.toISOString() || '', durationMs: result.timing.durationMs || '', })); await csvWriter.writeRecords(records); logger_1.logger.info(`Results exported to CSV: ${filePath}`, { jobId, userId }); return filePath; } catch (error) { logger_1.logger.error('Failed to export results to CSV:', error); throw new Error(`Failed to export results: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async cleanupOldJobs(maxAgeHours = 72) { try { const cutoff = new Date(Date.now() - (maxAgeHours * 60 * 60 * 1000)); const result = await prisma_1.prisma.bulkJob.deleteMany({ where: { createdAt: { lt: cutoff, }, status: { in: ['COMPLETED', 'FAILED', 'CANCELLED'], }, }, }); try { const files = await promises_1.default.readdir(this.uploadsDir); for (const file of files) { const filePath = path_1.default.join(this.uploadsDir, file); const stats = await promises_1.default.stat(filePath); if (stats.mtime < cutoff) { await promises_1.default.unlink(filePath); logger_1.logger.info(`Cleaned up old file: ${file}`); } } } catch (error) { logger_1.logger.warn('Failed to cleanup old files:', error); } logger_1.logger.info(`Cleaned up ${result.count} old bulk jobs`); } catch (error) { logger_1.logger.error('Failed to cleanup old jobs:', error); } } async getQueueStats() { try { const stats = await prisma_1.prisma.bulkJob.groupBy({ by: ['status'], _count: { status: true, }, }); const statusCounts = stats.reduce((acc, stat) => { acc[stat.status] = stat._count.status; return acc; }, {}); return { waiting: statusCounts['PENDING'] || 0, active: statusCounts['RUNNING'] || 0, completed: statusCounts['COMPLETED'] || 0, failed: statusCounts['FAILED'] || 0, delayed: 0, }; } catch (error) { logger_1.logger.error('Failed to get queue stats:', error); return { waiting: 0, active: 0, completed: 0, failed: 0, delayed: 0, }; } } async processBulkJobInMemory(jobId, urls, options) { try { await prisma_1.prisma.bulkJob.update({ where: { id: jobId }, data: { status: 'RUNNING', startedAt: new Date(), }, }); logger_1.logger.info(`Starting bulk job processing: ${jobId} with ${urls.length} URLs`); let processed = 0; let successful = 0; let failed = 0; for (const urlData of urls) { try { logger_1.logger.info(`Processing URL: ${urlData.url}`); await new Promise(resolve => setTimeout(resolve, 100)); processed++; successful++; if (processed % 10 === 0) { await prisma_1.prisma.bulkJob.update({ where: { id: jobId }, data: { processedUrls: processed, successfulUrls: successful, failedUrls: failed, }, }); } } catch (error) { logger_1.logger.error(`Failed to process URL ${urlData.url}:`, error); processed++; failed++; } } await prisma_1.prisma.bulkJob.update({ where: { id: jobId }, data: { status: 'COMPLETED', processedUrls: processed, successfulUrls: successful, failedUrls: failed, finishedAt: new Date(), }, }); logger_1.logger.info(`Bulk job ${jobId} completed: ${successful} successful, ${failed} failed`); } catch (error) { logger_1.logger.error(`Bulk job ${jobId} failed:`, error); await prisma_1.prisma.bulkJob.update({ where: { id: jobId }, data: { status: 'FAILED', finishedAt: new Date(), }, }).catch(() => { }); } } } exports.BulkProcessorService = BulkProcessorService; //# sourceMappingURL=bulk-processor.service.js.map