fix(scheduler): implement deadline-based matching with 5-run limit and fix security issues

Security fixes:
- Replace $queryRawUnsafe with parameterized $queryRaw in admin.js to prevent SQL injection
- Use PostgreSQL ANY() operator for safe array parameter handling

Scheduler improvements:
- Add registrationDeadline support - scheduler now waits until deadline before running
- Implement 5-run limit after deadline (runs exactly 5 times with 5-minute intervals)
- Add countScheduledRunsAfterDeadline() to track post-deadline runs
- Add environment variable validation with sensible min/max ranges
- Fix Prisma query syntax (remove invalid endDate null check for non-nullable field)

UI improvements:
- Fix colspan mismatch in MatchingRunsSection (6 → 8 columns)
- Remove duplicate "Uruchom Matching" button, keep only "Run now" with audit tracking
- Simplify MatchingConfigSection to focus on deadline configuration

Logging enhancements:
- Add detailed scheduler logs showing run progress (e.g., "Running post-deadline matching (3/5)")
- Log wait times before deadline and between runs
- Show completion status after 5 runs
This commit is contained in:
Radosław Gierwiało
2025-11-30 14:42:08 +01:00
parent 752d65035a
commit 560ff1edc1
4 changed files with 184 additions and 106 deletions

View File

@@ -108,17 +108,16 @@ router.get('/events/:slug/matching-runs', authenticate, async (req, res, next) =
// Cheap and valuable: shows actual created pairs in this run.
if (runs.length > 0) {
const runIds = runs.map(r => r.id);
// Single SQL query for all listed runs
const placeholders = runIds.join(',');
const aggRows = await prisma.$queryRawUnsafe(
`SELECT origin_run_id AS "originRunId",
COUNT(*)::int AS "totalSuggestions",
COUNT(*) FILTER (WHERE recorder_id IS NOT NULL)::int AS "assignedCount",
COUNT(*) FILTER (WHERE status = 'not_found')::int AS "notFoundCount"
FROM recording_suggestions
WHERE event_id = ${event.id} AND origin_run_id IN (${placeholders})
GROUP BY origin_run_id`
);
// Single SQL query for all listed runs (using parameterized query to prevent SQL injection)
const aggRows = await prisma.$queryRaw`
SELECT origin_run_id AS "originRunId",
COUNT(*)::int AS "totalSuggestions",
COUNT(*) FILTER (WHERE recorder_id IS NOT NULL)::int AS "assignedCount",
COUNT(*) FILTER (WHERE status = 'not_found')::int AS "notFoundCount"
FROM recording_suggestions
WHERE event_id = ${event.id} AND origin_run_id = ANY(${runIds})
GROUP BY origin_run_id
`;
const aggByRun = new Map(aggRows.map(r => [r.originRunId, r]));
for (const r of runs) {
const agg = aggByRun.get(r.id) || { totalSuggestions: 0, assignedCount: 0, notFoundCount: 0 };

View File

@@ -6,8 +6,20 @@ const { SUGGESTION_STATUS } = require('../constants');
// Designed for single-backend deployments. When scaling to multiple replicas,
// add a DB-based lock (e.g., pg advisory lock) to ensure single run per event.
const DEFAULT_INTERVAL_SEC = parseInt(process.env.SCHEDULER_INTERVAL_SEC || '300', 10); // 5 min
const MIN_INTERVAL_SEC = parseInt(process.env.MATCHING_MIN_INTERVAL_SEC || '60', 10); // 1 min guard
// Environment variable validation and parsing
function parsePositiveInt(envVar, defaultValue, minValue = 1, maxValue = 86400) {
const parsed = parseInt(envVar || String(defaultValue), 10);
if (isNaN(parsed) || parsed < minValue || parsed > maxValue) {
console.warn(
`[scheduler] Invalid value for env var (parsed: ${parsed}). Using default: ${defaultValue}s`
);
return defaultValue;
}
return parsed;
}
const DEFAULT_INTERVAL_SEC = parsePositiveInt(process.env.SCHEDULER_INTERVAL_SEC, 300, 30, 3600); // 5 min (range: 30s - 1h)
const MIN_INTERVAL_SEC = parsePositiveInt(process.env.MATCHING_MIN_INTERVAL_SEC, 60, 10, 1800); // 1 min (range: 10s - 30min)
let timer = null;
let runningEvents = new Set(); // In-memory guard to avoid overlapping runs per event
@@ -20,15 +32,13 @@ async function listCandidateEvents() {
return prisma.event.findMany({
where: {
// Include events that end today or in the future
OR: [
{ endDate: { gte: now } },
{ endDate: { equals: null } },
],
endDate: { gte: now },
},
select: {
id: true,
slug: true,
matchingRunAt: true,
registrationDeadline: true,
startDate: true,
endDate: true,
},
@@ -36,13 +46,66 @@ async function listCandidateEvents() {
});
}
function shouldRunForEvent(event) {
/**
* Count how many times the scheduler has run matching for this event after the deadline
* @param {Object} event - Event object with id and registrationDeadline
* @returns {Promise<number>} - Number of scheduled runs after deadline
*/
async function countScheduledRunsAfterDeadline(event) {
if (!event.registrationDeadline) {
return 0; // No deadline set, no post-deadline runs
}
const count = await prisma.matchingRun.count({
where: {
eventId: event.id,
trigger: 'scheduler',
startedAt: { gte: new Date(event.registrationDeadline) },
},
});
return count;
}
async function shouldRunForEvent(event) {
if (!event) return false;
// Rate limiting per event by last run timestamp
const now = Date.now();
const POST_DEADLINE_RUNS_LIMIT = 5; // Run matching 5 times after deadline
const POST_DEADLINE_INTERVAL_SEC = 300; // 5 minutes between runs after deadline
// Check if registration deadline has passed (if set)
if (event.registrationDeadline) {
const deadline = new Date(event.registrationDeadline).getTime();
if (now < deadline) {
return false; // Too early - deadline not reached yet
}
// Deadline has passed - enforce 5-run limit with 5-minute intervals
const runsAfterDeadline = await countScheduledRunsAfterDeadline(event);
if (runsAfterDeadline >= POST_DEADLINE_RUNS_LIMIT) {
return false; // Already ran 5 times after deadline, stop scheduling
}
// Check 5-minute interval between post-deadline runs
if (event.matchingRunAt) {
const last = new Date(event.matchingRunAt).getTime();
const secondsSinceLastRun = (now - last) / 1000;
if (secondsSinceLastRun < POST_DEADLINE_INTERVAL_SEC) {
return false; // Too soon, wait 5 minutes between runs
}
}
return true; // Deadline passed, under 5 runs, and 5 minutes elapsed - OK to run
}
// No deadline set - use old rate limiting logic
if (!event.matchingRunAt) return true;
const last = new Date(event.matchingRunAt).getTime();
const now = Date.now();
return (now - last) / 1000 >= MIN_INTERVAL_SEC;
}
@@ -53,6 +116,21 @@ async function runForEvent(event) {
runningEvents.add(event.id);
const startedAt = new Date();
// Log matching trigger reason
if (event.registrationDeadline) {
const deadlinePassed = new Date(event.registrationDeadline) <= startedAt;
if (deadlinePassed) {
// Count current run number (will be +1 after this run completes)
const currentRuns = await countScheduledRunsAfterDeadline(event);
console.log(
`[scheduler] ${event.slug}: Running post-deadline matching (${currentRuns + 1}/5)...`
);
}
} else {
console.log(`[scheduler] ${event.slug}: Running scheduled matching (no deadline set)...`);
}
let runRow = null;
try {
// Create run audit row
@@ -110,12 +188,62 @@ async function runForEvent(event) {
async function tick() {
try {
const events = await listCandidateEvents();
if (events.length === 0) {
return; // No events to process
}
for (const event of events) {
if (shouldRunForEvent(event)) {
const shouldRun = await shouldRunForEvent(event);
if (shouldRun) {
// Fire and forget to allow parallel per-event processing in one process
// but still guarded per event by runningEvents set
// eslint-disable-next-line no-void
void runForEvent(event);
} else {
// Log why event was skipped
const now = Date.now();
if (event.registrationDeadline) {
const deadline = new Date(event.registrationDeadline).getTime();
if (now < deadline) {
// Before deadline
const minutesUntil = Math.round((deadline - now) / 60000);
console.log(
`[scheduler] ${event.slug}: Waiting for deadline (in ${minutesUntil} min)`
);
} else {
// After deadline - check why it's not running
const runsAfterDeadline = await countScheduledRunsAfterDeadline(event);
if (runsAfterDeadline >= 5) {
console.log(
`[scheduler] ${event.slug}: Completed all 5 post-deadline runs (${runsAfterDeadline}/5)`
);
} else if (event.matchingRunAt) {
const last = new Date(event.matchingRunAt).getTime();
const secondsSince = Math.round((now - last) / 1000);
const secondsUntil = 300 - secondsSince; // 5 minutes = 300 seconds
if (secondsUntil > 0) {
console.log(
`[scheduler] ${event.slug}: Post-deadline run ${runsAfterDeadline}/5 - next in ${Math.round(secondsUntil / 60)} min`
);
}
}
}
} else if (event.matchingRunAt) {
// No deadline set - show rate limiting info
const last = new Date(event.matchingRunAt).getTime();
const secondsSince = Math.round((now - last) / 1000);
if (secondsSince < MIN_INTERVAL_SEC) {
const secondsUntil = MIN_INTERVAL_SEC - secondsSince;
console.log(
`[scheduler] ${event.slug}: Rate limited (retry in ${secondsUntil}s)`
);
}
}
}
}
} catch (err) {