From b9d6f42ff54b244e189f0d524d3aa99c6bd349a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Gierwia=C5=82o?= Date: Sat, 15 Nov 2025 21:56:05 +0100 Subject: [PATCH] feat(import): add WSDC list parser with location + update-missing-location option\n\n- Parse Event List for name/date/location/country/sourceUrl\n- Support --source list|calendar|auto and --update-missing-location\n- Keep calendar import for title/start/end/url fallback\n- Update CLI summary and docs (ADMIN_CLI.md, README.md) --- README.md | 5 +- backend/src/cli/index.js | 10 +- backend/src/services/import/worldsdc.js | 139 +++++++++++++++++++++--- docs/ADMIN_CLI.md | 42 ++++++- 4 files changed, 175 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 5987b29..baec2f9 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,9 @@ Use an in-container admin console for quick maintenance. - Create user: `docker compose exec backend npm run cli -- users:create --email admin@example.com --username admin --password 'Secret123!'` - Verify email: `docker compose exec backend npm run cli -- users:verify --email admin@example.com` - List events: `docker compose exec backend npm run cli -- events:list --limit 10` - - Import WSDC calendar (dry-run): `docker compose exec backend npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31` +- Import WSDC (calendar/list) dry-run: `docker compose exec backend npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31` +- Import with location from list: `docker compose exec backend npm run cli -- events:import:wsdc --source list --limit 50` +- Enrich missing location: `docker compose exec backend npm run cli -- events:import:wsdc --source list --update-missing-location` - Event details by slug: `docker compose exec backend npm run cli -- events:details --slug warsaw-dance-2025 [--participants 25]` - Event participants: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 100` - Event participants CSV: `docker compose exec backend npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv` @@ -388,6 +390,7 @@ Production equivalents use `backend-prod` instead of `backend`. REPL specifics: - Inside REPL use `run('users:list --limit 20')` or `.cli users:list --limit 20`. - Top-level await works for Prisma: `await prisma.user.findMany({ take: 5 })`. +- CLI errors in REPL do not exit the session; the error is printed so you can correct and retry. ## šŸ” Security diff --git a/backend/src/cli/index.js b/backend/src/cli/index.js index 8bcc000..7b66da0 100644 --- a/backend/src/cli/index.js +++ b/backend/src/cli/index.js @@ -318,19 +318,27 @@ async function cmdEventsImportWsdc(opts) { const until = opts.until ? new Date(opts.until) : null; const limit = opts.limit ? parseInt(String(opts.limit), 10) : undefined; const dryRun = Boolean(opts['dry-run'] || opts.dry || opts.dry_run); + const source = (opts.source || 'auto').toLowerCase(); // auto|calendar|list + const updateMissingLocation = Boolean(opts['update-missing-location'] || opts.updateMissingLocation || opts.update_missing_location); - const result = await importWorldsdc({ since, until, limit, dryRun }); + const result = await importWorldsdc({ since, until, limit, dryRun, source, updateMissingLocation }); console.log('Import summary:', { fetched: result.fetched, considered: result.considered, created: result.created.length, + updated: result.updated.length, skipped: result.skipped.length, dryRun, + source, }); if (result.created.length) { console.log('To create / created:'); console.table(result.created.map(e => ({ name: e.name, startDate: e.startDate, endDate: e.endDate, location: e.location || null, sourceUrl: e.sourceUrl }))); } + if (result.updated.length) { + console.log('To update / updated (location):'); + console.table(result.updated.map(e => ({ name: e.name, startDate: e.startDate, oldLocation: e.oldLocation || null, newLocation: e.newLocation }))); + } if (result.skipped.length) { console.log('Skipped:'); console.table(result.skipped.map(e => ({ name: e.name, startDate: e.startDate, reason: e.reason }))); diff --git a/backend/src/services/import/worldsdc.js b/backend/src/services/import/worldsdc.js index 18072db..357f988 100644 --- a/backend/src/services/import/worldsdc.js +++ b/backend/src/services/import/worldsdc.js @@ -50,17 +50,121 @@ function parseEventsFromCalendar(html) { })).filter((e) => e.name && e.startDate); } -async function importWorldsdc({ since = null, until = null, limit, dryRun = false } = {}) { +function parseMonth(m) { + const map = { + jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5, + jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11, + }; + const key = (m || '').toLowerCase().slice(0, 3); + return key in map ? map[key] : null; +} + +function parseDateRange(raw) { + // Examples: + // "Nov 13 - 16, 2025" + // "Dec 29, 2025 - Jan 2, 2026" + // "Jan 5-8, 2026" + const s = String(raw || '').replace(/\s+/g, ' ').trim(); + if (!s) return { start: null, end: null }; + const parts = s.split('-'); + if (parts.length < 2) { + const m = /([A-Za-z]{3,})\s+(\d{1,2}),?\s*(\d{4})?/.exec(s); + if (!m) return { start: null, end: null }; + const month = parseMonth(m[1]); + const day = parseInt(m[2], 10); + const year = m[3] ? parseInt(m[3], 10) : new Date().getFullYear(); + const d = new Date(Date.UTC(year, month ?? 0, day)); + return { start: d, end: d }; + } + const left = parts[0].trim(); + const right = parts.slice(1).join('-').trim(); + + // Left side + let lm = /([A-Za-z]{3,})\s+(\d{1,2})(?:,\s*(\d{4}))?/.exec(left); + if (!lm) lm = /([A-Za-z]{3,})\s*(\d{1,2})/.exec(left); + const lMonth = lm ? parseMonth(lm[1]) : null; + const lDay = lm ? parseInt(lm[2], 10) : null; + const lYear = lm && lm[3] ? parseInt(lm[3], 10) : null; + + // Right side + let rm = /([A-Za-z]{3,})?\s*(\d{1,2})(?:,\s*(\d{4}))?/.exec(right); + const rMonth = rm && rm[1] ? parseMonth(rm[1]) : lMonth; + const rDay = rm ? parseInt(rm[2], 10) : lDay; + let rYear = rm && rm[3] ? parseInt(rm[3], 10) : lYear; + let year = lYear ?? rYear ?? new Date().getFullYear(); + if (!rYear) rYear = year; + + const start = (lMonth != null && lDay != null) ? new Date(Date.UTC(year, lMonth, lDay)) : null; + const end = (rMonth != null && rDay != null) ? new Date(Date.UTC(rYear, rMonth, rDay)) : start; + return { start, end }; +} + +function textBetween(html, startIdx, endIdx) { + return html.slice(startIdx, endIdx); +} + +function stripTags(s) { + return String(s || '') + .replace(/<[^>]*>/g, ' ') + .replace(/ /g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +function parseEventsFromList(html) { + // Find the first inside the elementor shortcode block + const markerIdx = html.indexOf('
'); + const from = markerIdx !== -1 ? markerIdx : 0; + const tblStart = html.indexOf('', tblStart); + if (tblEnd === -1) return []; + const tableHtml = textBetween(html, tblStart, tblEnd + 8); + + // Split rows + const rows = tableHtml.split(/]*>/i).slice(1); // skip header marker + const out = []; + for (const row of rows) { + const cells = row.split(/]*>/i).slice(1).map((c) => c.split('')[0]); + if (cells.length < 3) continue; + const dateCell = stripTags(cells[0]); + const nameCell = cells[1]; + const locCell = stripTags(cells[2]); + const countryCell = stripTags(cells[3] || ''); + // Extract link and name + const linkMatch = /]+href=\"([^\"]+)\"[^>]*>(.*?)<\/a>/i.exec(nameCell) || /]+href='([^']+)'[^>]*>(.*?)<\/a>/i.exec(nameCell); + const sourceUrl = linkMatch ? linkMatch[1] : null; + const name = linkMatch ? stripTags(linkMatch[2]) : stripTags(nameCell); + + const { start, end } = parseDateRange(dateCell); + if (!name || !start) continue; + out.push({ name, startDate: start, endDate: end || start, location: locCell || null, country: countryCell || null, sourceUrl }); + } + return out; +} + +async function importWorldsdc({ since = null, until = null, limit, dryRun = false, source = 'auto', updateMissingLocation = false } = {}) { const { prisma } = require('../../utils/db'); - const url = 'https://www.worldsdc.com/events/calendar/'; - const html = await fetchUrl(url); - const events = parseEventsFromCalendar(html); + let events = []; + if (source === 'calendar' || source === 'auto') { + try { + const calHtml = await fetchUrl('https://www.worldsdc.com/events/calendar/'); + events = parseEventsFromCalendar(calHtml); + } catch (_) { /* ignore */ } + } + if ((source === 'list' || source === 'auto') && events.length === 0) { + try { + const listHtml = await fetchUrl('https://www.worldsdc.com/events/'); + events = parseEventsFromList(listHtml); + } catch (_) { /* ignore */ } + } const normalizeDate = (s) => new Date(s.split('T')[0] || s); let list = events.map((e) => ({ name: e.name.trim(), - startDate: normalizeDate(e.startDate), - endDate: e.endDate ? normalizeDate(e.endDate) : normalizeDate(e.startDate), - location: null, // not provided by calendar source + startDate: e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate), + endDate: e.endDate ? (e.endDate instanceof Date ? e.endDate : normalizeDate(e.endDate)) : (e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate)), + location: e.location || 'Unknown', + country: e.country || null, sourceUrl: e.sourceUrl || null, })); const fetched = list.length; @@ -69,14 +173,24 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals if (limit) list = list.slice(0, limit); const created = []; + const updated = []; const skipped = []; for (const e of list) { const existing = await prisma.event.findFirst({ where: { name: e.name, startDate: e.startDate }, - select: { id: true }, + select: { id: true, location: true }, }); if (existing) { - skipped.push({ ...e, reason: 'exists' }); + if (updateMissingLocation && e.location && (!existing.location || existing.location === 'Unknown')) { + if (dryRun) { + updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location }); + } else { + await prisma.event.update({ where: { id: existing.id }, data: { location: e.location } }); + updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location }); + } + } else { + skipped.push({ ...e, reason: 'exists' }); + } continue; } if (dryRun) { @@ -86,7 +200,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals const saved = await prisma.event.create({ data: { name: e.name, - location: 'Unknown', + location: e.location || 'Unknown', startDate: e.startDate, endDate: e.endDate, description: null, @@ -96,8 +210,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals }); created.push({ ...e, id: saved.id, slug: saved.slug }); } - return { fetched, considered: list.length, created, skipped }; + return { fetched, considered: list.length, created, updated, skipped }; } -module.exports = { parseEventsFromCalendar, importWorldsdc }; - +module.exports = { parseEventsFromCalendar, parseEventsFromList, importWorldsdc }; diff --git a/docs/ADMIN_CLI.md b/docs/ADMIN_CLI.md index 69ddbf2..8f945de 100644 --- a/docs/ADMIN_CLI.md +++ b/docs/ADMIN_CLI.md @@ -1,6 +1,6 @@ # Admin CLI & REPL — spotlight.cam -Administrative console for maintenance tasks inside the backend container. Provides both one-shot commands and an interactive REPL with app context. +Administrative console for maintenance tasks inside the backend container. Provides both one‑shot commands and an interactive REPL with app context. --- @@ -23,17 +23,32 @@ With Makefile shortcuts: - Default entry: running `npm run cli` starts a Node.js REPL with: - `prisma` client and `bcrypt` in context - Aliases: `u = prisma.user`, `e = prisma.event`, `m = prisma.match`, `ep = prisma.eventParticipant`, `r = prisma.rating` - - Top-level await: `await u.findMany({ take: 5 })` + - Top‑level await: `await u.findMany({ take: 5 })` - Autocomplete (TAB) from Node REPL - - Persistent history in `.repl_history` (best-effort) + - Persistent history in `.repl_history` (best‑effort) - Run CLI subcommands from inside REPL: - `.cli users:list --limit 20` - `run('events:checkin --username john_doe --slug warsaw-dance-2025')` +- Error handling: CLI errors (e.g., missing flags) do not exit the REPL; the error is printed and you can correct the command and retry. --- ## Commands +### Cheatsheet +- `repl` +- `users:list [--limit ]` +- `users:create --email --username --password

[--first ] [--last ]` +- `users:verify --email ` +- `events:list [--limit ]` +- `events:details --slug ` +- `events:participants --slug [--limit ] [--csv]` +- `events:import:wsdc [--dry-run] [--since YYYY-MM-DD] [--until YYYY-MM-DD] [--limit ]` +- `matches:list [--limit ] [--status pending|accepted|completed]` +- `events:checkin --username --slug ` +- `logs:app [--lines ]` +- `logs:messages [--limit ]` + ### users:list - Description: List users - Options: @@ -106,16 +121,24 @@ With Makefile shortcuts: - `npm run cli -- events:participants --slug warsaw-dance-2025 --limit 200 --csv > participants.csv` ### events:import:wsdc -- Description: Import events from worldsdc.com calendar page. -- Notes: Uses the calendar’s embedded data (title/start/end/url). Does not set `participants` or `worldsdcId`. +- Description: Import events from worldsdc.com. +- Sources: + - `--source list`: parse Event List page (includes Event Location + Country) + - `--source calendar`: parse Events Calendar page (title/start/end/url) + - `--source auto` (default): try calendar first, then list if calendar parsing yields no events - Options: - `--dry-run`: show what would be created without writing to DB - `--since YYYY-MM-DD`: only events on/after date - `--until YYYY-MM-DD`: only events on/before date - `--limit `: limit considered items after filtering + - `--update-missing-location`: when an event already exists and its `location` is empty/"Unknown", update it from the imported source (list recommended) - Examples: - `npm run cli -- events:import:wsdc --dry-run --since 2024-01-01 --until 2024-12-31` - - `npm run cli -- events:import:wsdc --limit 50` + - `npm run cli -- events:import:wsdc --source list --limit 50` + - `npm run cli -- events:import:wsdc --source list --update-missing-location` +- Field mapping: `name`, `startDate`, `endDate`, `sourceUrl`, `location` (from list) or `Unknown` (from calendar). No updates to `participantsCount`. +- Dedup strategy: skip when `(name, startDate)` already exists. +- Alias: `events:import:worldsdc` (kept for backwards compatibility). ### logs:app - Description: Tail application log file (if configured) @@ -147,3 +170,10 @@ With Makefile shortcuts: - CLI has full DB access; restrict usage to SSH + container exec - Do not expose CLI via HTTP endpoints - Consider audit logs for sensitive actions if needed + +--- + +## Domain Notes + +- In spotlight.cam, `participantsCount` reflects local check‑ins (EventParticipants), not official WSDC participant metrics. +- The WSDC calendar source does not provide reliable location or participant counts; importer intentionally does not set `worldsdcId` or `participantsCount`.