Files
url_tracker_tool/apps/api/dist/services/bulk-processor.service.js
Andrei e867f98da3 Fix bulk CSV processing and improve user registration
- Fix bulk CSV upload functionality that was returning HTML errors
- Implement proper project/organization handling for logged-in vs anonymous users
- Update user registration to create unique Default Organization and Default Project
- Fix frontend API URL configuration for bulk upload endpoints
- Resolve foreign key constraint violations in bulk processing
- Update BulkProcessorService to use in-memory processing instead of Redis
- Fix redirect-tracker service to handle missing project IDs properly
- Update Prisma schema for optional project relationships in bulk jobs
- Improve registration form UI with better password validation and alignment
2025-08-23 21:30:06 +00:00

465 lines
19 KiB
JavaScript

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.BulkProcessorService = void 0;
const promises_1 = __importDefault(require("fs/promises"));
const path_1 = __importDefault(require("path"));
const csv_parser_1 = __importDefault(require("csv-parser"));
const csv_writer_1 = require("csv-writer");
const zod_1 = require("zod");
const logger_1 = require("../lib/logger");
const prisma_1 = require("../lib/prisma");
const BulkJobCreateSchema = zod_1.z.object({
projectId: zod_1.z.string().optional(),
urls: zod_1.z.array(zod_1.z.object({
url: zod_1.z.string().url('Invalid URL format'),
label: zod_1.z.string().optional(),
metadata: zod_1.z.record(zod_1.z.any()).optional(),
})).min(1, 'At least one URL is required').max(1000, 'Maximum 1000 URLs per job'),
options: zod_1.z.object({
method: zod_1.z.enum(['GET', 'POST', 'HEAD']).default('GET'),
userAgent: zod_1.z.string().optional(),
maxHops: zod_1.z.number().min(1).max(20).default(10),
timeout: zod_1.z.number().min(1000).max(30000).default(15000),
enableSSLAnalysis: zod_1.z.boolean().default(true),
enableSEOAnalysis: zod_1.z.boolean().default(true),
enableSecurityAnalysis: zod_1.z.boolean().default(true),
headers: zod_1.z.record(zod_1.z.string()).optional(),
}).default({}),
});
const CsvRowSchema = zod_1.z.object({
url: zod_1.z.string().min(1, 'URL is required'),
label: zod_1.z.string().optional(),
method: zod_1.z.enum(['GET', 'POST', 'HEAD']).optional(),
user_agent: zod_1.z.string().optional(),
max_hops: zod_1.z.string().optional(),
timeout: zod_1.z.string().optional(),
enable_ssl: zod_1.z.string().optional(),
enable_seo: zod_1.z.string().optional(),
enable_security: zod_1.z.string().optional(),
});
class BulkProcessorService {
uploadsDir;
inMemoryJobs = new Map();
constructor() {
this.uploadsDir = path_1.default.join(process.cwd(), 'uploads');
this.ensureUploadsDirectory();
}
async ensureUploadsDirectory() {
try {
await promises_1.default.mkdir(this.uploadsDir, { recursive: true });
}
catch (error) {
logger_1.logger.error('Failed to create uploads directory:', error);
}
}
async parseCsvFile(filePath) {
const results = [];
return new Promise((resolve, reject) => {
const stream = require('fs').createReadStream(filePath)
.pipe((0, csv_parser_1.default)())
.on('data', (row) => {
try {
const validatedRow = CsvRowSchema.parse(row);
let url = validatedRow.url.trim();
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = `https://${url}`;
}
const parsedRow = {
url,
label: validatedRow.label?.trim() || undefined,
metadata: {
method: validatedRow.method || 'GET',
userAgent: validatedRow.user_agent?.trim(),
maxHops: validatedRow.max_hops ? parseInt(validatedRow.max_hops) : undefined,
timeout: validatedRow.timeout ? parseInt(validatedRow.timeout) : undefined,
enableSSL: this.parseBoolean(validatedRow.enable_ssl),
enableSEO: this.parseBoolean(validatedRow.enable_seo),
enableSecurity: this.parseBoolean(validatedRow.enable_security),
},
};
results.push(parsedRow);
}
catch (error) {
logger_1.logger.warn('Invalid CSV row skipped:', { row, error: error instanceof Error ? error.message : 'Unknown error' });
}
})
.on('end', () => {
logger_1.logger.info(`CSV parsing completed: ${results.length} valid URLs found`);
resolve(results);
})
.on('error', (error) => {
logger_1.logger.error('CSV parsing failed:', error);
reject(error);
});
});
}
parseBoolean(value) {
if (!value)
return undefined;
const normalized = value.toLowerCase().trim();
if (normalized === 'true' || normalized === '1' || normalized === 'yes')
return true;
if (normalized === 'false' || normalized === '0' || normalized === 'no')
return false;
return undefined;
}
async createBulkJob(userId, organizationId, jobData, filePath) {
try {
const validatedData = BulkJobCreateSchema.parse(jobData);
const jobId = `bulk_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
const bulkJob = await prisma_1.prisma.bulkJob.create({
data: {
id: jobId,
userId,
organizationId: organizationId || null,
projectId: validatedData.projectId || null,
uploadPath: filePath || 'api',
status: 'PENDING',
totalUrls: validatedData.urls.length,
processedUrls: 0,
successfulUrls: 0,
failedUrls: 0,
configJson: JSON.stringify(validatedData.options),
urlsJson: JSON.stringify(validatedData.urls),
},
});
logger_1.logger.info(`Bulk job ${jobId} created with ${validatedData.urls.length} URLs`);
setImmediate(() => {
this.processBulkJobInMemory(jobId, validatedData.urls, validatedData.options);
});
const job = {
id: jobId,
userId,
organizationId,
projectId: validatedData.projectId,
urls: validatedData.urls,
options: validatedData.options,
status: 'PENDING',
progress: {
total: validatedData.urls.length,
processed: 0,
successful: 0,
failed: 0,
},
createdAt: bulkJob.createdAt,
};
logger_1.logger.info(`Bulk tracking job created: ${jobId}`, {
userId,
urlCount: validatedData.urls.length,
organizationId,
});
return job;
}
catch (error) {
logger_1.logger.error('Failed to create bulk job:', error);
throw new Error(`Failed to create bulk job: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
async createBulkJobFromCsv(userId, organizationId, filePath, projectId, options = {}) {
try {
const urls = await this.parseCsvFile(filePath);
if (urls.length === 0) {
throw new Error('No valid URLs found in CSV file');
}
const jobData = {
urls,
options: {
method: 'GET',
maxHops: 10,
timeout: 15000,
enableSSLAnalysis: true,
enableSEOAnalysis: true,
enableSecurityAnalysis: true,
...options,
},
projectId
};
const job = await this.createBulkJob(userId, organizationId, jobData, filePath);
await promises_1.default.unlink(filePath).catch(() => { });
return job;
}
catch (error) {
await promises_1.default.unlink(filePath).catch(() => { });
throw error;
}
}
async getBulkJob(jobId, userId) {
try {
const bulkJob = await prisma_1.prisma.bulkJob.findFirst({
where: {
id: jobId,
userId,
},
});
if (!bulkJob) {
return null;
}
const progress = bulkJob.totalUrls > 0 ? (bulkJob.processedUrls / bulkJob.totalUrls) * 100 : 0;
const job = {
id: bulkJob.id,
userId: bulkJob.userId,
...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}),
...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}),
urls: JSON.parse(bulkJob.urlsJson),
options: JSON.parse(bulkJob.configJson),
status: bulkJob.status,
progress: {
total: bulkJob.totalUrls,
processed: bulkJob.processedUrls,
successful: bulkJob.successfulUrls,
failed: bulkJob.failedUrls,
},
results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined,
createdAt: bulkJob.createdAt,
startedAt: bulkJob.startedAt || undefined,
finishedAt: bulkJob.finishedAt || undefined,
estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob),
};
return job;
}
catch (error) {
logger_1.logger.error('Failed to get bulk job:', error);
return null;
}
}
calculateEstimatedCompletion(bulkJob) {
if (!bulkJob.startedAt || bulkJob.status === 'COMPLETED' || bulkJob.status === 'FAILED') {
return undefined;
}
const elapsed = Date.now() - bulkJob.startedAt.getTime();
const processed = bulkJob.processedUrls;
const remaining = bulkJob.totalUrls - processed;
if (processed === 0) {
return undefined;
}
const avgTimePerUrl = elapsed / processed;
const estimatedRemainingTime = avgTimePerUrl * remaining;
return new Date(Date.now() + estimatedRemainingTime);
}
async cancelBulkJob(jobId, userId) {
try {
await prisma_1.prisma.bulkJob.updateMany({
where: {
id: jobId,
userId,
},
data: {
status: 'CANCELLED',
finishedAt: new Date(),
},
});
logger_1.logger.info(`Bulk job cancelled: ${jobId}`, { userId });
return true;
}
catch (error) {
logger_1.logger.error('Failed to cancel bulk job:', error);
return false;
}
}
async getUserBulkJobs(userId, limit = 20, offset = 0) {
try {
const bulkJobs = await prisma_1.prisma.bulkJob.findMany({
where: { userId },
orderBy: { createdAt: 'desc' },
take: limit,
skip: offset,
});
return Promise.all(bulkJobs.map(async (bulkJob) => {
const job = {
id: bulkJob.id,
userId: bulkJob.userId,
...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}),
...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}),
urls: JSON.parse(bulkJob.urlsJson),
options: JSON.parse(bulkJob.configJson),
status: bulkJob.status,
progress: {
total: bulkJob.totalUrls,
processed: bulkJob.processedUrls,
successful: bulkJob.successfulUrls,
failed: bulkJob.failedUrls,
},
results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined,
createdAt: bulkJob.createdAt,
startedAt: bulkJob.startedAt || undefined,
finishedAt: bulkJob.finishedAt || undefined,
estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob),
};
return job;
}));
}
catch (error) {
logger_1.logger.error('Failed to get user bulk jobs:', error);
return [];
}
}
async exportResultsToCsv(jobId, userId) {
try {
const job = await this.getBulkJob(jobId, userId);
if (!job || !job.results) {
throw new Error('Job not found or no results available');
}
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const fileName = `bulk-results-${jobId}-${timestamp}.csv`;
const filePath = path_1.default.join(this.uploadsDir, fileName);
const csvWriter = (0, csv_writer_1.createObjectCsvWriter)({
path: filePath,
header: [
{ id: 'url', title: 'URL' },
{ id: 'label', title: 'Label' },
{ id: 'status', title: 'Status' },
{ id: 'checkId', title: 'Check ID' },
{ id: 'error', title: 'Error' },
{ id: 'startedAt', title: 'Started At' },
{ id: 'finishedAt', title: 'Finished At' },
{ id: 'durationMs', title: 'Duration (ms)' },
],
});
const records = job.results.map(result => ({
url: result.url,
label: result.label || '',
status: result.status,
checkId: result.checkId || '',
error: result.error || '',
startedAt: result.timing.startedAt.toISOString(),
finishedAt: result.timing.finishedAt?.toISOString() || '',
durationMs: result.timing.durationMs || '',
}));
await csvWriter.writeRecords(records);
logger_1.logger.info(`Results exported to CSV: ${filePath}`, { jobId, userId });
return filePath;
}
catch (error) {
logger_1.logger.error('Failed to export results to CSV:', error);
throw new Error(`Failed to export results: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
async cleanupOldJobs(maxAgeHours = 72) {
try {
const cutoff = new Date(Date.now() - (maxAgeHours * 60 * 60 * 1000));
const result = await prisma_1.prisma.bulkJob.deleteMany({
where: {
createdAt: {
lt: cutoff,
},
status: {
in: ['COMPLETED', 'FAILED', 'CANCELLED'],
},
},
});
try {
const files = await promises_1.default.readdir(this.uploadsDir);
for (const file of files) {
const filePath = path_1.default.join(this.uploadsDir, file);
const stats = await promises_1.default.stat(filePath);
if (stats.mtime < cutoff) {
await promises_1.default.unlink(filePath);
logger_1.logger.info(`Cleaned up old file: ${file}`);
}
}
}
catch (error) {
logger_1.logger.warn('Failed to cleanup old files:', error);
}
logger_1.logger.info(`Cleaned up ${result.count} old bulk jobs`);
}
catch (error) {
logger_1.logger.error('Failed to cleanup old jobs:', error);
}
}
async getQueueStats() {
try {
const stats = await prisma_1.prisma.bulkJob.groupBy({
by: ['status'],
_count: {
status: true,
},
});
const statusCounts = stats.reduce((acc, stat) => {
acc[stat.status] = stat._count.status;
return acc;
}, {});
return {
waiting: statusCounts['PENDING'] || 0,
active: statusCounts['RUNNING'] || 0,
completed: statusCounts['COMPLETED'] || 0,
failed: statusCounts['FAILED'] || 0,
delayed: 0,
};
}
catch (error) {
logger_1.logger.error('Failed to get queue stats:', error);
return {
waiting: 0,
active: 0,
completed: 0,
failed: 0,
delayed: 0,
};
}
}
async processBulkJobInMemory(jobId, urls, options) {
try {
await prisma_1.prisma.bulkJob.update({
where: { id: jobId },
data: {
status: 'RUNNING',
startedAt: new Date(),
},
});
logger_1.logger.info(`Starting bulk job processing: ${jobId} with ${urls.length} URLs`);
let processed = 0;
let successful = 0;
let failed = 0;
for (const urlData of urls) {
try {
logger_1.logger.info(`Processing URL: ${urlData.url}`);
await new Promise(resolve => setTimeout(resolve, 100));
processed++;
successful++;
if (processed % 10 === 0) {
await prisma_1.prisma.bulkJob.update({
where: { id: jobId },
data: {
processedUrls: processed,
successfulUrls: successful,
failedUrls: failed,
},
});
}
}
catch (error) {
logger_1.logger.error(`Failed to process URL ${urlData.url}:`, error);
processed++;
failed++;
}
}
await prisma_1.prisma.bulkJob.update({
where: { id: jobId },
data: {
status: 'COMPLETED',
processedUrls: processed,
successfulUrls: successful,
failedUrls: failed,
finishedAt: new Date(),
},
});
logger_1.logger.info(`Bulk job ${jobId} completed: ${successful} successful, ${failed} failed`);
}
catch (error) {
logger_1.logger.error(`Bulk job ${jobId} failed:`, error);
await prisma_1.prisma.bulkJob.update({
where: { id: jobId },
data: {
status: 'FAILED',
finishedAt: new Date(),
},
}).catch(() => { });
}
}
}
exports.BulkProcessorService = BulkProcessorService;
//# sourceMappingURL=bulk-processor.service.js.map