feat(import): add WSDC list parser with location + update-missing-location option\n\n- Parse Event List for name/date/location/country/sourceUrl\n- Support --source list|calendar|auto and --update-missing-location\n- Keep calendar import for title/start/end/url fallback\n- Update CLI summary and docs (ADMIN_CLI.md, README.md)

This commit is contained in:
Radosław Gierwiało
2025-11-15 21:56:05 +01:00
parent 457de6c1c4
commit b9d6f42ff5
4 changed files with 175 additions and 21 deletions

View File

@@ -374,7 +374,9 @@ Use an in-container admin console for quick maintenance.
- Create user: `docker compose exec backend npm run cli -- users:create --email admin@example.com --username admin --password 'Secret123!'` - Create user: `docker compose exec backend npm run cli -- users:create --email admin@example.com --username admin --password 'Secret123!'`
- Verify email: `docker compose exec backend npm run cli -- users:verify --email admin@example.com` - Verify email: `docker compose exec backend npm run cli -- users:verify --email admin@example.com`
- List events: `docker compose exec backend npm run cli -- events:list --limit 10` - List events: `docker compose exec backend npm run cli -- events:list --limit 10`
- Import WSDC calendar (dry-run): `docker compose exec backend npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31` - Import WSDC (calendar/list) dry-run: `docker compose exec backend npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31`
- Import with location from list: `docker compose exec backend npm run cli -- events:import:wsdc --source list --limit 50`
- Enrich missing location: `docker compose exec backend npm run cli -- events:import:wsdc --source list --update-missing-location`
- Event details by slug: `docker compose exec backend npm run cli -- events:details --slug warsaw-dance-2025 [--participants 25]` - Event details by slug: `docker compose exec backend npm run cli -- events:details --slug warsaw-dance-2025 [--participants 25]`
- Event participants: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 100` - Event participants: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 100`
- Event participants CSV: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv` - Event participants CSV: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv`
@@ -388,6 +390,7 @@ Production equivalents use `backend-prod` instead of `backend`.
REPL specifics: REPL specifics:
- Inside REPL use `run('users:list --limit 20')` or `.cli users:list --limit 20`. - Inside REPL use `run('users:list --limit 20')` or `.cli users:list --limit 20`.
- Top-level await works for Prisma: `await prisma.user.findMany({ take: 5 })`. - Top-level await works for Prisma: `await prisma.user.findMany({ take: 5 })`.
- CLI errors in REPL do not exit the session; the error is printed so you can correct and retry.
## 🔐 Security ## 🔐 Security

View File

@@ -318,19 +318,27 @@ async function cmdEventsImportWsdc(opts) {
const until = opts.until ? new Date(opts.until) : null; const until = opts.until ? new Date(opts.until) : null;
const limit = opts.limit ? parseInt(String(opts.limit), 10) : undefined; const limit = opts.limit ? parseInt(String(opts.limit), 10) : undefined;
const dryRun = Boolean(opts['dry-run'] || opts.dry || opts.dry_run); const dryRun = Boolean(opts['dry-run'] || opts.dry || opts.dry_run);
const source = (opts.source || 'auto').toLowerCase(); // auto|calendar|list
const updateMissingLocation = Boolean(opts['update-missing-location'] || opts.updateMissingLocation || opts.update_missing_location);
const result = await importWorldsdc({ since, until, limit, dryRun }); const result = await importWorldsdc({ since, until, limit, dryRun, source, updateMissingLocation });
console.log('Import summary:', { console.log('Import summary:', {
fetched: result.fetched, fetched: result.fetched,
considered: result.considered, considered: result.considered,
created: result.created.length, created: result.created.length,
updated: result.updated.length,
skipped: result.skipped.length, skipped: result.skipped.length,
dryRun, dryRun,
source,
}); });
if (result.created.length) { if (result.created.length) {
console.log('To create / created:'); console.log('To create / created:');
console.table(result.created.map(e => ({ name: e.name, startDate: e.startDate, endDate: e.endDate, location: e.location || null, sourceUrl: e.sourceUrl }))); console.table(result.created.map(e => ({ name: e.name, startDate: e.startDate, endDate: e.endDate, location: e.location || null, sourceUrl: e.sourceUrl })));
} }
if (result.updated.length) {
console.log('To update / updated (location):');
console.table(result.updated.map(e => ({ name: e.name, startDate: e.startDate, oldLocation: e.oldLocation || null, newLocation: e.newLocation })));
}
if (result.skipped.length) { if (result.skipped.length) {
console.log('Skipped:'); console.log('Skipped:');
console.table(result.skipped.map(e => ({ name: e.name, startDate: e.startDate, reason: e.reason }))); console.table(result.skipped.map(e => ({ name: e.name, startDate: e.startDate, reason: e.reason })));

View File

@@ -50,17 +50,121 @@ function parseEventsFromCalendar(html) {
})).filter((e) => e.name && e.startDate); })).filter((e) => e.name && e.startDate);
} }
async function importWorldsdc({ since = null, until = null, limit, dryRun = false } = {}) { function parseMonth(m) {
const map = {
jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5,
jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11,
};
const key = (m || '').toLowerCase().slice(0, 3);
return key in map ? map[key] : null;
}
function parseDateRange(raw) {
// Examples:
// "Nov 13 - 16, 2025"
// "Dec 29, 2025 - Jan 2, 2026"
// "Jan 5-8, 2026"
const s = String(raw || '').replace(/\s+/g, ' ').trim();
if (!s) return { start: null, end: null };
const parts = s.split('-');
if (parts.length < 2) {
const m = /([A-Za-z]{3,})\s+(\d{1,2}),?\s*(\d{4})?/.exec(s);
if (!m) return { start: null, end: null };
const month = parseMonth(m[1]);
const day = parseInt(m[2], 10);
const year = m[3] ? parseInt(m[3], 10) : new Date().getFullYear();
const d = new Date(Date.UTC(year, month ?? 0, day));
return { start: d, end: d };
}
const left = parts[0].trim();
const right = parts.slice(1).join('-').trim();
// Left side
let lm = /([A-Za-z]{3,})\s+(\d{1,2})(?:,\s*(\d{4}))?/.exec(left);
if (!lm) lm = /([A-Za-z]{3,})\s*(\d{1,2})/.exec(left);
const lMonth = lm ? parseMonth(lm[1]) : null;
const lDay = lm ? parseInt(lm[2], 10) : null;
const lYear = lm && lm[3] ? parseInt(lm[3], 10) : null;
// Right side
let rm = /([A-Za-z]{3,})?\s*(\d{1,2})(?:,\s*(\d{4}))?/.exec(right);
const rMonth = rm && rm[1] ? parseMonth(rm[1]) : lMonth;
const rDay = rm ? parseInt(rm[2], 10) : lDay;
let rYear = rm && rm[3] ? parseInt(rm[3], 10) : lYear;
let year = lYear ?? rYear ?? new Date().getFullYear();
if (!rYear) rYear = year;
const start = (lMonth != null && lDay != null) ? new Date(Date.UTC(year, lMonth, lDay)) : null;
const end = (rMonth != null && rDay != null) ? new Date(Date.UTC(rYear, rMonth, rDay)) : start;
return { start, end };
}
function textBetween(html, startIdx, endIdx) {
return html.slice(startIdx, endIdx);
}
function stripTags(s) {
return String(s || '')
.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function parseEventsFromList(html) {
// Find the first <table> inside the elementor shortcode block
const markerIdx = html.indexOf('<div class="elementor-shortcode">');
const from = markerIdx !== -1 ? markerIdx : 0;
const tblStart = html.indexOf('<table', from);
if (tblStart === -1) return [];
const tblEnd = html.indexOf('</table>', tblStart);
if (tblEnd === -1) return [];
const tableHtml = textBetween(html, tblStart, tblEnd + 8);
// Split rows
const rows = tableHtml.split(/<tr[^>]*>/i).slice(1); // skip header marker
const out = [];
for (const row of rows) {
const cells = row.split(/<td[^>]*>/i).slice(1).map((c) => c.split('</td>')[0]);
if (cells.length < 3) continue;
const dateCell = stripTags(cells[0]);
const nameCell = cells[1];
const locCell = stripTags(cells[2]);
const countryCell = stripTags(cells[3] || '');
// Extract link and name
const linkMatch = /<a[^>]+href=\"([^\"]+)\"[^>]*>(.*?)<\/a>/i.exec(nameCell) || /<a[^>]+href='([^']+)'[^>]*>(.*?)<\/a>/i.exec(nameCell);
const sourceUrl = linkMatch ? linkMatch[1] : null;
const name = linkMatch ? stripTags(linkMatch[2]) : stripTags(nameCell);
const { start, end } = parseDateRange(dateCell);
if (!name || !start) continue;
out.push({ name, startDate: start, endDate: end || start, location: locCell || null, country: countryCell || null, sourceUrl });
}
return out;
}
async function importWorldsdc({ since = null, until = null, limit, dryRun = false, source = 'auto', updateMissingLocation = false } = {}) {
const { prisma } = require('../../utils/db'); const { prisma } = require('../../utils/db');
const url = 'https://www.worldsdc.com/events/calendar/'; let events = [];
const html = await fetchUrl(url); if (source === 'calendar' || source === 'auto') {
const events = parseEventsFromCalendar(html); try {
const calHtml = await fetchUrl('https://www.worldsdc.com/events/calendar/');
events = parseEventsFromCalendar(calHtml);
} catch (_) { /* ignore */ }
}
if ((source === 'list' || source === 'auto') && events.length === 0) {
try {
const listHtml = await fetchUrl('https://www.worldsdc.com/events/');
events = parseEventsFromList(listHtml);
} catch (_) { /* ignore */ }
}
const normalizeDate = (s) => new Date(s.split('T')[0] || s); const normalizeDate = (s) => new Date(s.split('T')[0] || s);
let list = events.map((e) => ({ let list = events.map((e) => ({
name: e.name.trim(), name: e.name.trim(),
startDate: normalizeDate(e.startDate), startDate: e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate),
endDate: e.endDate ? normalizeDate(e.endDate) : normalizeDate(e.startDate), endDate: e.endDate ? (e.endDate instanceof Date ? e.endDate : normalizeDate(e.endDate)) : (e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate)),
location: null, // not provided by calendar source location: e.location || 'Unknown',
country: e.country || null,
sourceUrl: e.sourceUrl || null, sourceUrl: e.sourceUrl || null,
})); }));
const fetched = list.length; const fetched = list.length;
@@ -69,14 +173,24 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
if (limit) list = list.slice(0, limit); if (limit) list = list.slice(0, limit);
const created = []; const created = [];
const updated = [];
const skipped = []; const skipped = [];
for (const e of list) { for (const e of list) {
const existing = await prisma.event.findFirst({ const existing = await prisma.event.findFirst({
where: { name: e.name, startDate: e.startDate }, where: { name: e.name, startDate: e.startDate },
select: { id: true }, select: { id: true, location: true },
}); });
if (existing) { if (existing) {
if (updateMissingLocation && e.location && (!existing.location || existing.location === 'Unknown')) {
if (dryRun) {
updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location });
} else {
await prisma.event.update({ where: { id: existing.id }, data: { location: e.location } });
updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location });
}
} else {
skipped.push({ ...e, reason: 'exists' }); skipped.push({ ...e, reason: 'exists' });
}
continue; continue;
} }
if (dryRun) { if (dryRun) {
@@ -86,7 +200,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
const saved = await prisma.event.create({ const saved = await prisma.event.create({
data: { data: {
name: e.name, name: e.name,
location: 'Unknown', location: e.location || 'Unknown',
startDate: e.startDate, startDate: e.startDate,
endDate: e.endDate, endDate: e.endDate,
description: null, description: null,
@@ -96,8 +210,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
}); });
created.push({ ...e, id: saved.id, slug: saved.slug }); created.push({ ...e, id: saved.id, slug: saved.slug });
} }
return { fetched, considered: list.length, created, skipped }; return { fetched, considered: list.length, created, updated, skipped };
} }
module.exports = { parseEventsFromCalendar, importWorldsdc }; module.exports = { parseEventsFromCalendar, parseEventsFromList, importWorldsdc };

View File

@@ -1,6 +1,6 @@
# Admin CLI & REPL — spotlight.cam # Admin CLI & REPL — spotlight.cam
Administrative console for maintenance tasks inside the backend container. Provides both one-shot commands and an interactive REPL with app context. Administrative console for maintenance tasks inside the backend container. Provides both oneshot commands and an interactive REPL with app context.
--- ---
@@ -23,17 +23,32 @@ With Makefile shortcuts:
- Default entry: running `npm run cli` starts a Node.js REPL with: - Default entry: running `npm run cli` starts a Node.js REPL with:
- `prisma` client and `bcrypt` in context - `prisma` client and `bcrypt` in context
- Aliases: `u = prisma.user`, `e = prisma.event`, `m = prisma.match`, `ep = prisma.eventParticipant`, `r = prisma.rating` - Aliases: `u = prisma.user`, `e = prisma.event`, `m = prisma.match`, `ep = prisma.eventParticipant`, `r = prisma.rating`
- Top-level await: `await u.findMany({ take: 5 })` - Toplevel await: `await u.findMany({ take: 5 })`
- Autocomplete (TAB) from Node REPL - Autocomplete (TAB) from Node REPL
- Persistent history in `.repl_history` (best-effort) - Persistent history in `.repl_history` (besteffort)
- Run CLI subcommands from inside REPL: - Run CLI subcommands from inside REPL:
- `.cli users:list --limit 20` - `.cli users:list --limit 20`
- `run('events:checkin --username john_doe --slug warsaw-dance-2025')` - `run('events:checkin --username john_doe --slug warsaw-dance-2025')`
- Error handling: CLI errors (e.g., missing flags) do not exit the REPL; the error is printed and you can correct the command and retry.
--- ---
## Commands ## Commands
### Cheatsheet
- `repl`
- `users:list [--limit <n>]`
- `users:create --email <e> --username <u> --password <p> [--first <f>] [--last <l>]`
- `users:verify --email <e>`
- `events:list [--limit <n>]`
- `events:details --slug <slug>`
- `events:participants --slug <slug> [--limit <n>] [--csv]`
- `events:import:wsdc [--dry-run] [--since YYYY-MM-DD] [--until YYYY-MM-DD] [--limit <n>]`
- `matches:list [--limit <n>] [--status pending|accepted|completed]`
- `events:checkin --username <u> --slug <s>`
- `logs:app [--lines <n>]`
- `logs:messages [--limit <n>]`
### users:list ### users:list
- Description: List users - Description: List users
- Options: - Options:
@@ -106,16 +121,24 @@ With Makefile shortcuts:
- `npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv` - `npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv`
### events:import:wsdc ### events:import:wsdc
- Description: Import events from worldsdc.com calendar page. - Description: Import events from worldsdc.com.
- Notes: Uses the calendars embedded data (title/start/end/url). Does not set `participants` or `worldsdcId`. - Sources:
- `--source list`: parse Event List page (includes Event Location + Country)
- `--source calendar`: parse Events Calendar page (title/start/end/url)
- `--source auto` (default): try calendar first, then list if calendar parsing yields no events
- Options: - Options:
- `--dry-run`: show what would be created without writing to DB - `--dry-run`: show what would be created without writing to DB
- `--since YYYY-MM-DD`: only events on/after date - `--since YYYY-MM-DD`: only events on/after date
- `--until YYYY-MM-DD`: only events on/before date - `--until YYYY-MM-DD`: only events on/before date
- `--limit <n>`: limit considered items after filtering - `--limit <n>`: limit considered items after filtering
- `--update-missing-location`: when an event already exists and its `location` is empty/"Unknown", update it from the imported source (list recommended)
- Examples: - Examples:
- `npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31` - `npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31`
- `npm run cli -- events:import:wsdc --limit 50` - `npm run cli -- events:import:wsdc --source list --limit 50`
- `npm run cli -- events:import:wsdc --source list --update-missing-location`
- Field mapping: `name`, `startDate`, `endDate`, `sourceUrl`, `location` (from list) or `Unknown` (from calendar). No updates to `participantsCount`.
- Dedup strategy: skip when `(name, startDate)` already exists.
- Alias: `events:import:worldsdc` (kept for backwards compatibility).
### logs:app ### logs:app
- Description: Tail application log file (if configured) - Description: Tail application log file (if configured)
@@ -147,3 +170,10 @@ With Makefile shortcuts:
- CLI has full DB access; restrict usage to SSH + container exec - CLI has full DB access; restrict usage to SSH + container exec
- Do not expose CLI via HTTP endpoints - Do not expose CLI via HTTP endpoints
- Consider audit logs for sensitive actions if needed - Consider audit logs for sensitive actions if needed
---
## Domain Notes
- In spotlight.cam, `participantsCount` reflects local checkins (EventParticipants), not official WSDC participant metrics.
- The WSDC calendar source does not provide reliable location or participant counts; importer intentionally does not set `worldsdcId` or `participantsCount`.