- Fix bulk CSV upload functionality that was returning HTML errors - Implement proper project/organization handling for logged-in vs anonymous users - Update user registration to create unique Default Organization and Default Project - Fix frontend API URL configuration for bulk upload endpoints - Resolve foreign key constraint violations in bulk processing - Update BulkProcessorService to use in-memory processing instead of Redis - Fix redirect-tracker service to handle missing project IDs properly - Update Prisma schema for optional project relationships in bulk jobs - Improve registration form UI with better password validation and alignment
465 lines
19 KiB
JavaScript
465 lines
19 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.BulkProcessorService = void 0;
|
|
const promises_1 = __importDefault(require("fs/promises"));
|
|
const path_1 = __importDefault(require("path"));
|
|
const csv_parser_1 = __importDefault(require("csv-parser"));
|
|
const csv_writer_1 = require("csv-writer");
|
|
const zod_1 = require("zod");
|
|
const logger_1 = require("../lib/logger");
|
|
const prisma_1 = require("../lib/prisma");
|
|
const BulkJobCreateSchema = zod_1.z.object({
|
|
projectId: zod_1.z.string().optional(),
|
|
urls: zod_1.z.array(zod_1.z.object({
|
|
url: zod_1.z.string().url('Invalid URL format'),
|
|
label: zod_1.z.string().optional(),
|
|
metadata: zod_1.z.record(zod_1.z.any()).optional(),
|
|
})).min(1, 'At least one URL is required').max(1000, 'Maximum 1000 URLs per job'),
|
|
options: zod_1.z.object({
|
|
method: zod_1.z.enum(['GET', 'POST', 'HEAD']).default('GET'),
|
|
userAgent: zod_1.z.string().optional(),
|
|
maxHops: zod_1.z.number().min(1).max(20).default(10),
|
|
timeout: zod_1.z.number().min(1000).max(30000).default(15000),
|
|
enableSSLAnalysis: zod_1.z.boolean().default(true),
|
|
enableSEOAnalysis: zod_1.z.boolean().default(true),
|
|
enableSecurityAnalysis: zod_1.z.boolean().default(true),
|
|
headers: zod_1.z.record(zod_1.z.string()).optional(),
|
|
}).default({}),
|
|
});
|
|
const CsvRowSchema = zod_1.z.object({
|
|
url: zod_1.z.string().min(1, 'URL is required'),
|
|
label: zod_1.z.string().optional(),
|
|
method: zod_1.z.enum(['GET', 'POST', 'HEAD']).optional(),
|
|
user_agent: zod_1.z.string().optional(),
|
|
max_hops: zod_1.z.string().optional(),
|
|
timeout: zod_1.z.string().optional(),
|
|
enable_ssl: zod_1.z.string().optional(),
|
|
enable_seo: zod_1.z.string().optional(),
|
|
enable_security: zod_1.z.string().optional(),
|
|
});
|
|
class BulkProcessorService {
|
|
uploadsDir;
|
|
inMemoryJobs = new Map();
|
|
constructor() {
|
|
this.uploadsDir = path_1.default.join(process.cwd(), 'uploads');
|
|
this.ensureUploadsDirectory();
|
|
}
|
|
async ensureUploadsDirectory() {
|
|
try {
|
|
await promises_1.default.mkdir(this.uploadsDir, { recursive: true });
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to create uploads directory:', error);
|
|
}
|
|
}
|
|
async parseCsvFile(filePath) {
|
|
const results = [];
|
|
return new Promise((resolve, reject) => {
|
|
const stream = require('fs').createReadStream(filePath)
|
|
.pipe((0, csv_parser_1.default)())
|
|
.on('data', (row) => {
|
|
try {
|
|
const validatedRow = CsvRowSchema.parse(row);
|
|
let url = validatedRow.url.trim();
|
|
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
url = `https://${url}`;
|
|
}
|
|
const parsedRow = {
|
|
url,
|
|
label: validatedRow.label?.trim() || undefined,
|
|
metadata: {
|
|
method: validatedRow.method || 'GET',
|
|
userAgent: validatedRow.user_agent?.trim(),
|
|
maxHops: validatedRow.max_hops ? parseInt(validatedRow.max_hops) : undefined,
|
|
timeout: validatedRow.timeout ? parseInt(validatedRow.timeout) : undefined,
|
|
enableSSL: this.parseBoolean(validatedRow.enable_ssl),
|
|
enableSEO: this.parseBoolean(validatedRow.enable_seo),
|
|
enableSecurity: this.parseBoolean(validatedRow.enable_security),
|
|
},
|
|
};
|
|
results.push(parsedRow);
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.warn('Invalid CSV row skipped:', { row, error: error instanceof Error ? error.message : 'Unknown error' });
|
|
}
|
|
})
|
|
.on('end', () => {
|
|
logger_1.logger.info(`CSV parsing completed: ${results.length} valid URLs found`);
|
|
resolve(results);
|
|
})
|
|
.on('error', (error) => {
|
|
logger_1.logger.error('CSV parsing failed:', error);
|
|
reject(error);
|
|
});
|
|
});
|
|
}
|
|
parseBoolean(value) {
|
|
if (!value)
|
|
return undefined;
|
|
const normalized = value.toLowerCase().trim();
|
|
if (normalized === 'true' || normalized === '1' || normalized === 'yes')
|
|
return true;
|
|
if (normalized === 'false' || normalized === '0' || normalized === 'no')
|
|
return false;
|
|
return undefined;
|
|
}
|
|
async createBulkJob(userId, organizationId, jobData, filePath) {
|
|
try {
|
|
const validatedData = BulkJobCreateSchema.parse(jobData);
|
|
const jobId = `bulk_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
const bulkJob = await prisma_1.prisma.bulkJob.create({
|
|
data: {
|
|
id: jobId,
|
|
userId,
|
|
organizationId: organizationId || null,
|
|
projectId: validatedData.projectId || null,
|
|
uploadPath: filePath || 'api',
|
|
status: 'PENDING',
|
|
totalUrls: validatedData.urls.length,
|
|
processedUrls: 0,
|
|
successfulUrls: 0,
|
|
failedUrls: 0,
|
|
configJson: JSON.stringify(validatedData.options),
|
|
urlsJson: JSON.stringify(validatedData.urls),
|
|
},
|
|
});
|
|
logger_1.logger.info(`Bulk job ${jobId} created with ${validatedData.urls.length} URLs`);
|
|
setImmediate(() => {
|
|
this.processBulkJobInMemory(jobId, validatedData.urls, validatedData.options);
|
|
});
|
|
const job = {
|
|
id: jobId,
|
|
userId,
|
|
organizationId,
|
|
projectId: validatedData.projectId,
|
|
urls: validatedData.urls,
|
|
options: validatedData.options,
|
|
status: 'PENDING',
|
|
progress: {
|
|
total: validatedData.urls.length,
|
|
processed: 0,
|
|
successful: 0,
|
|
failed: 0,
|
|
},
|
|
createdAt: bulkJob.createdAt,
|
|
};
|
|
logger_1.logger.info(`Bulk tracking job created: ${jobId}`, {
|
|
userId,
|
|
urlCount: validatedData.urls.length,
|
|
organizationId,
|
|
});
|
|
return job;
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to create bulk job:', error);
|
|
throw new Error(`Failed to create bulk job: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
}
|
|
}
|
|
async createBulkJobFromCsv(userId, organizationId, filePath, projectId, options = {}) {
|
|
try {
|
|
const urls = await this.parseCsvFile(filePath);
|
|
if (urls.length === 0) {
|
|
throw new Error('No valid URLs found in CSV file');
|
|
}
|
|
const jobData = {
|
|
urls,
|
|
options: {
|
|
method: 'GET',
|
|
maxHops: 10,
|
|
timeout: 15000,
|
|
enableSSLAnalysis: true,
|
|
enableSEOAnalysis: true,
|
|
enableSecurityAnalysis: true,
|
|
...options,
|
|
},
|
|
projectId
|
|
};
|
|
const job = await this.createBulkJob(userId, organizationId, jobData, filePath);
|
|
await promises_1.default.unlink(filePath).catch(() => { });
|
|
return job;
|
|
}
|
|
catch (error) {
|
|
await promises_1.default.unlink(filePath).catch(() => { });
|
|
throw error;
|
|
}
|
|
}
|
|
async getBulkJob(jobId, userId) {
|
|
try {
|
|
const bulkJob = await prisma_1.prisma.bulkJob.findFirst({
|
|
where: {
|
|
id: jobId,
|
|
userId,
|
|
},
|
|
});
|
|
if (!bulkJob) {
|
|
return null;
|
|
}
|
|
const progress = bulkJob.totalUrls > 0 ? (bulkJob.processedUrls / bulkJob.totalUrls) * 100 : 0;
|
|
const job = {
|
|
id: bulkJob.id,
|
|
userId: bulkJob.userId,
|
|
...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}),
|
|
...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}),
|
|
urls: JSON.parse(bulkJob.urlsJson),
|
|
options: JSON.parse(bulkJob.configJson),
|
|
status: bulkJob.status,
|
|
progress: {
|
|
total: bulkJob.totalUrls,
|
|
processed: bulkJob.processedUrls,
|
|
successful: bulkJob.successfulUrls,
|
|
failed: bulkJob.failedUrls,
|
|
},
|
|
results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined,
|
|
createdAt: bulkJob.createdAt,
|
|
startedAt: bulkJob.startedAt || undefined,
|
|
finishedAt: bulkJob.finishedAt || undefined,
|
|
estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob),
|
|
};
|
|
return job;
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to get bulk job:', error);
|
|
return null;
|
|
}
|
|
}
|
|
calculateEstimatedCompletion(bulkJob) {
|
|
if (!bulkJob.startedAt || bulkJob.status === 'COMPLETED' || bulkJob.status === 'FAILED') {
|
|
return undefined;
|
|
}
|
|
const elapsed = Date.now() - bulkJob.startedAt.getTime();
|
|
const processed = bulkJob.processedUrls;
|
|
const remaining = bulkJob.totalUrls - processed;
|
|
if (processed === 0) {
|
|
return undefined;
|
|
}
|
|
const avgTimePerUrl = elapsed / processed;
|
|
const estimatedRemainingTime = avgTimePerUrl * remaining;
|
|
return new Date(Date.now() + estimatedRemainingTime);
|
|
}
|
|
async cancelBulkJob(jobId, userId) {
|
|
try {
|
|
await prisma_1.prisma.bulkJob.updateMany({
|
|
where: {
|
|
id: jobId,
|
|
userId,
|
|
},
|
|
data: {
|
|
status: 'CANCELLED',
|
|
finishedAt: new Date(),
|
|
},
|
|
});
|
|
logger_1.logger.info(`Bulk job cancelled: ${jobId}`, { userId });
|
|
return true;
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to cancel bulk job:', error);
|
|
return false;
|
|
}
|
|
}
|
|
async getUserBulkJobs(userId, limit = 20, offset = 0) {
|
|
try {
|
|
const bulkJobs = await prisma_1.prisma.bulkJob.findMany({
|
|
where: { userId },
|
|
orderBy: { createdAt: 'desc' },
|
|
take: limit,
|
|
skip: offset,
|
|
});
|
|
return Promise.all(bulkJobs.map(async (bulkJob) => {
|
|
const job = {
|
|
id: bulkJob.id,
|
|
userId: bulkJob.userId,
|
|
...(bulkJob.organizationId ? { organizationId: bulkJob.organizationId } : {}),
|
|
...(bulkJob.projectId ? { projectId: bulkJob.projectId } : {}),
|
|
urls: JSON.parse(bulkJob.urlsJson),
|
|
options: JSON.parse(bulkJob.configJson),
|
|
status: bulkJob.status,
|
|
progress: {
|
|
total: bulkJob.totalUrls,
|
|
processed: bulkJob.processedUrls,
|
|
successful: bulkJob.successfulUrls,
|
|
failed: bulkJob.failedUrls,
|
|
},
|
|
results: bulkJob.resultsJson ? JSON.parse(bulkJob.resultsJson) : undefined,
|
|
createdAt: bulkJob.createdAt,
|
|
startedAt: bulkJob.startedAt || undefined,
|
|
finishedAt: bulkJob.finishedAt || undefined,
|
|
estimatedCompletionAt: this.calculateEstimatedCompletion(bulkJob),
|
|
};
|
|
return job;
|
|
}));
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to get user bulk jobs:', error);
|
|
return [];
|
|
}
|
|
}
|
|
async exportResultsToCsv(jobId, userId) {
|
|
try {
|
|
const job = await this.getBulkJob(jobId, userId);
|
|
if (!job || !job.results) {
|
|
throw new Error('Job not found or no results available');
|
|
}
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
const fileName = `bulk-results-${jobId}-${timestamp}.csv`;
|
|
const filePath = path_1.default.join(this.uploadsDir, fileName);
|
|
const csvWriter = (0, csv_writer_1.createObjectCsvWriter)({
|
|
path: filePath,
|
|
header: [
|
|
{ id: 'url', title: 'URL' },
|
|
{ id: 'label', title: 'Label' },
|
|
{ id: 'status', title: 'Status' },
|
|
{ id: 'checkId', title: 'Check ID' },
|
|
{ id: 'error', title: 'Error' },
|
|
{ id: 'startedAt', title: 'Started At' },
|
|
{ id: 'finishedAt', title: 'Finished At' },
|
|
{ id: 'durationMs', title: 'Duration (ms)' },
|
|
],
|
|
});
|
|
const records = job.results.map(result => ({
|
|
url: result.url,
|
|
label: result.label || '',
|
|
status: result.status,
|
|
checkId: result.checkId || '',
|
|
error: result.error || '',
|
|
startedAt: result.timing.startedAt.toISOString(),
|
|
finishedAt: result.timing.finishedAt?.toISOString() || '',
|
|
durationMs: result.timing.durationMs || '',
|
|
}));
|
|
await csvWriter.writeRecords(records);
|
|
logger_1.logger.info(`Results exported to CSV: ${filePath}`, { jobId, userId });
|
|
return filePath;
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to export results to CSV:', error);
|
|
throw new Error(`Failed to export results: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
}
|
|
}
|
|
async cleanupOldJobs(maxAgeHours = 72) {
|
|
try {
|
|
const cutoff = new Date(Date.now() - (maxAgeHours * 60 * 60 * 1000));
|
|
const result = await prisma_1.prisma.bulkJob.deleteMany({
|
|
where: {
|
|
createdAt: {
|
|
lt: cutoff,
|
|
},
|
|
status: {
|
|
in: ['COMPLETED', 'FAILED', 'CANCELLED'],
|
|
},
|
|
},
|
|
});
|
|
try {
|
|
const files = await promises_1.default.readdir(this.uploadsDir);
|
|
for (const file of files) {
|
|
const filePath = path_1.default.join(this.uploadsDir, file);
|
|
const stats = await promises_1.default.stat(filePath);
|
|
if (stats.mtime < cutoff) {
|
|
await promises_1.default.unlink(filePath);
|
|
logger_1.logger.info(`Cleaned up old file: ${file}`);
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.warn('Failed to cleanup old files:', error);
|
|
}
|
|
logger_1.logger.info(`Cleaned up ${result.count} old bulk jobs`);
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to cleanup old jobs:', error);
|
|
}
|
|
}
|
|
async getQueueStats() {
|
|
try {
|
|
const stats = await prisma_1.prisma.bulkJob.groupBy({
|
|
by: ['status'],
|
|
_count: {
|
|
status: true,
|
|
},
|
|
});
|
|
const statusCounts = stats.reduce((acc, stat) => {
|
|
acc[stat.status] = stat._count.status;
|
|
return acc;
|
|
}, {});
|
|
return {
|
|
waiting: statusCounts['PENDING'] || 0,
|
|
active: statusCounts['RUNNING'] || 0,
|
|
completed: statusCounts['COMPLETED'] || 0,
|
|
failed: statusCounts['FAILED'] || 0,
|
|
delayed: 0,
|
|
};
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('Failed to get queue stats:', error);
|
|
return {
|
|
waiting: 0,
|
|
active: 0,
|
|
completed: 0,
|
|
failed: 0,
|
|
delayed: 0,
|
|
};
|
|
}
|
|
}
|
|
async processBulkJobInMemory(jobId, urls, options) {
|
|
try {
|
|
await prisma_1.prisma.bulkJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
status: 'RUNNING',
|
|
startedAt: new Date(),
|
|
},
|
|
});
|
|
logger_1.logger.info(`Starting bulk job processing: ${jobId} with ${urls.length} URLs`);
|
|
let processed = 0;
|
|
let successful = 0;
|
|
let failed = 0;
|
|
for (const urlData of urls) {
|
|
try {
|
|
logger_1.logger.info(`Processing URL: ${urlData.url}`);
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
processed++;
|
|
successful++;
|
|
if (processed % 10 === 0) {
|
|
await prisma_1.prisma.bulkJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
processedUrls: processed,
|
|
successfulUrls: successful,
|
|
failedUrls: failed,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error(`Failed to process URL ${urlData.url}:`, error);
|
|
processed++;
|
|
failed++;
|
|
}
|
|
}
|
|
await prisma_1.prisma.bulkJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
status: 'COMPLETED',
|
|
processedUrls: processed,
|
|
successfulUrls: successful,
|
|
failedUrls: failed,
|
|
finishedAt: new Date(),
|
|
},
|
|
});
|
|
logger_1.logger.info(`Bulk job ${jobId} completed: ${successful} successful, ${failed} failed`);
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error(`Bulk job ${jobId} failed:`, error);
|
|
await prisma_1.prisma.bulkJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
status: 'FAILED',
|
|
finishedAt: new Date(),
|
|
},
|
|
}).catch(() => { });
|
|
}
|
|
}
|
|
}
|
|
exports.BulkProcessorService = BulkProcessorService;
|
|
//# sourceMappingURL=bulk-processor.service.js.map
|