feat(import): add WSDC list parser with location + update-missing-location option\n\n- Parse Event List for name/date/location/country/sourceUrl\n- Support --source list|calendar|auto and --update-missing-location\n- Keep calendar import for title/start/end/url fallback\n- Update CLI summary and docs (ADMIN_CLI.md, README.md)
This commit is contained in:
@@ -318,19 +318,27 @@ async function cmdEventsImportWsdc(opts) {
|
||||
const until = opts.until ? new Date(opts.until) : null;
|
||||
const limit = opts.limit ? parseInt(String(opts.limit), 10) : undefined;
|
||||
const dryRun = Boolean(opts['dry-run'] || opts.dry || opts.dry_run);
|
||||
const source = (opts.source || 'auto').toLowerCase(); // auto|calendar|list
|
||||
const updateMissingLocation = Boolean(opts['update-missing-location'] || opts.updateMissingLocation || opts.update_missing_location);
|
||||
|
||||
const result = await importWorldsdc({ since, until, limit, dryRun });
|
||||
const result = await importWorldsdc({ since, until, limit, dryRun, source, updateMissingLocation });
|
||||
console.log('Import summary:', {
|
||||
fetched: result.fetched,
|
||||
considered: result.considered,
|
||||
created: result.created.length,
|
||||
updated: result.updated.length,
|
||||
skipped: result.skipped.length,
|
||||
dryRun,
|
||||
source,
|
||||
});
|
||||
if (result.created.length) {
|
||||
console.log('To create / created:');
|
||||
console.table(result.created.map(e => ({ name: e.name, startDate: e.startDate, endDate: e.endDate, location: e.location || null, sourceUrl: e.sourceUrl })));
|
||||
}
|
||||
if (result.updated.length) {
|
||||
console.log('To update / updated (location):');
|
||||
console.table(result.updated.map(e => ({ name: e.name, startDate: e.startDate, oldLocation: e.oldLocation || null, newLocation: e.newLocation })));
|
||||
}
|
||||
if (result.skipped.length) {
|
||||
console.log('Skipped:');
|
||||
console.table(result.skipped.map(e => ({ name: e.name, startDate: e.startDate, reason: e.reason })));
|
||||
|
||||
@@ -50,17 +50,121 @@ function parseEventsFromCalendar(html) {
|
||||
})).filter((e) => e.name && e.startDate);
|
||||
}
|
||||
|
||||
async function importWorldsdc({ since = null, until = null, limit, dryRun = false } = {}) {
|
||||
function parseMonth(m) {
|
||||
const map = {
|
||||
jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5,
|
||||
jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11,
|
||||
};
|
||||
const key = (m || '').toLowerCase().slice(0, 3);
|
||||
return key in map ? map[key] : null;
|
||||
}
|
||||
|
||||
function parseDateRange(raw) {
|
||||
// Examples:
|
||||
// "Nov 13 - 16, 2025"
|
||||
// "Dec 29, 2025 - Jan 2, 2026"
|
||||
// "Jan 5-8, 2026"
|
||||
const s = String(raw || '').replace(/\s+/g, ' ').trim();
|
||||
if (!s) return { start: null, end: null };
|
||||
const parts = s.split('-');
|
||||
if (parts.length < 2) {
|
||||
const m = /([A-Za-z]{3,})\s+(\d{1,2}),?\s*(\d{4})?/.exec(s);
|
||||
if (!m) return { start: null, end: null };
|
||||
const month = parseMonth(m[1]);
|
||||
const day = parseInt(m[2], 10);
|
||||
const year = m[3] ? parseInt(m[3], 10) : new Date().getFullYear();
|
||||
const d = new Date(Date.UTC(year, month ?? 0, day));
|
||||
return { start: d, end: d };
|
||||
}
|
||||
const left = parts[0].trim();
|
||||
const right = parts.slice(1).join('-').trim();
|
||||
|
||||
// Left side
|
||||
let lm = /([A-Za-z]{3,})\s+(\d{1,2})(?:,\s*(\d{4}))?/.exec(left);
|
||||
if (!lm) lm = /([A-Za-z]{3,})\s*(\d{1,2})/.exec(left);
|
||||
const lMonth = lm ? parseMonth(lm[1]) : null;
|
||||
const lDay = lm ? parseInt(lm[2], 10) : null;
|
||||
const lYear = lm && lm[3] ? parseInt(lm[3], 10) : null;
|
||||
|
||||
// Right side
|
||||
let rm = /([A-Za-z]{3,})?\s*(\d{1,2})(?:,\s*(\d{4}))?/.exec(right);
|
||||
const rMonth = rm && rm[1] ? parseMonth(rm[1]) : lMonth;
|
||||
const rDay = rm ? parseInt(rm[2], 10) : lDay;
|
||||
let rYear = rm && rm[3] ? parseInt(rm[3], 10) : lYear;
|
||||
let year = lYear ?? rYear ?? new Date().getFullYear();
|
||||
if (!rYear) rYear = year;
|
||||
|
||||
const start = (lMonth != null && lDay != null) ? new Date(Date.UTC(year, lMonth, lDay)) : null;
|
||||
const end = (rMonth != null && rDay != null) ? new Date(Date.UTC(rYear, rMonth, rDay)) : start;
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
function textBetween(html, startIdx, endIdx) {
|
||||
return html.slice(startIdx, endIdx);
|
||||
}
|
||||
|
||||
function stripTags(s) {
|
||||
return String(s || '')
|
||||
.replace(/<[^>]*>/g, ' ')
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function parseEventsFromList(html) {
|
||||
// Find the first <table> inside the elementor shortcode block
|
||||
const markerIdx = html.indexOf('<div class="elementor-shortcode">');
|
||||
const from = markerIdx !== -1 ? markerIdx : 0;
|
||||
const tblStart = html.indexOf('<table', from);
|
||||
if (tblStart === -1) return [];
|
||||
const tblEnd = html.indexOf('</table>', tblStart);
|
||||
if (tblEnd === -1) return [];
|
||||
const tableHtml = textBetween(html, tblStart, tblEnd + 8);
|
||||
|
||||
// Split rows
|
||||
const rows = tableHtml.split(/<tr[^>]*>/i).slice(1); // skip header marker
|
||||
const out = [];
|
||||
for (const row of rows) {
|
||||
const cells = row.split(/<td[^>]*>/i).slice(1).map((c) => c.split('</td>')[0]);
|
||||
if (cells.length < 3) continue;
|
||||
const dateCell = stripTags(cells[0]);
|
||||
const nameCell = cells[1];
|
||||
const locCell = stripTags(cells[2]);
|
||||
const countryCell = stripTags(cells[3] || '');
|
||||
// Extract link and name
|
||||
const linkMatch = /<a[^>]+href=\"([^\"]+)\"[^>]*>(.*?)<\/a>/i.exec(nameCell) || /<a[^>]+href='([^']+)'[^>]*>(.*?)<\/a>/i.exec(nameCell);
|
||||
const sourceUrl = linkMatch ? linkMatch[1] : null;
|
||||
const name = linkMatch ? stripTags(linkMatch[2]) : stripTags(nameCell);
|
||||
|
||||
const { start, end } = parseDateRange(dateCell);
|
||||
if (!name || !start) continue;
|
||||
out.push({ name, startDate: start, endDate: end || start, location: locCell || null, country: countryCell || null, sourceUrl });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function importWorldsdc({ since = null, until = null, limit, dryRun = false, source = 'auto', updateMissingLocation = false } = {}) {
|
||||
const { prisma } = require('../../utils/db');
|
||||
const url = 'https://www.worldsdc.com/events/calendar/';
|
||||
const html = await fetchUrl(url);
|
||||
const events = parseEventsFromCalendar(html);
|
||||
let events = [];
|
||||
if (source === 'calendar' || source === 'auto') {
|
||||
try {
|
||||
const calHtml = await fetchUrl('https://www.worldsdc.com/events/calendar/');
|
||||
events = parseEventsFromCalendar(calHtml);
|
||||
} catch (_) { /* ignore */ }
|
||||
}
|
||||
if ((source === 'list' || source === 'auto') && events.length === 0) {
|
||||
try {
|
||||
const listHtml = await fetchUrl('https://www.worldsdc.com/events/');
|
||||
events = parseEventsFromList(listHtml);
|
||||
} catch (_) { /* ignore */ }
|
||||
}
|
||||
const normalizeDate = (s) => new Date(s.split('T')[0] || s);
|
||||
let list = events.map((e) => ({
|
||||
name: e.name.trim(),
|
||||
startDate: normalizeDate(e.startDate),
|
||||
endDate: e.endDate ? normalizeDate(e.endDate) : normalizeDate(e.startDate),
|
||||
location: null, // not provided by calendar source
|
||||
startDate: e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate),
|
||||
endDate: e.endDate ? (e.endDate instanceof Date ? e.endDate : normalizeDate(e.endDate)) : (e.startDate instanceof Date ? e.startDate : normalizeDate(e.startDate)),
|
||||
location: e.location || 'Unknown',
|
||||
country: e.country || null,
|
||||
sourceUrl: e.sourceUrl || null,
|
||||
}));
|
||||
const fetched = list.length;
|
||||
@@ -69,14 +173,24 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
|
||||
if (limit) list = list.slice(0, limit);
|
||||
|
||||
const created = [];
|
||||
const updated = [];
|
||||
const skipped = [];
|
||||
for (const e of list) {
|
||||
const existing = await prisma.event.findFirst({
|
||||
where: { name: e.name, startDate: e.startDate },
|
||||
select: { id: true },
|
||||
select: { id: true, location: true },
|
||||
});
|
||||
if (existing) {
|
||||
skipped.push({ ...e, reason: 'exists' });
|
||||
if (updateMissingLocation && e.location && (!existing.location || existing.location === 'Unknown')) {
|
||||
if (dryRun) {
|
||||
updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location });
|
||||
} else {
|
||||
await prisma.event.update({ where: { id: existing.id }, data: { location: e.location } });
|
||||
updated.push({ ...e, id: existing.id, oldLocation: existing.location || null, newLocation: e.location });
|
||||
}
|
||||
} else {
|
||||
skipped.push({ ...e, reason: 'exists' });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (dryRun) {
|
||||
@@ -86,7 +200,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
|
||||
const saved = await prisma.event.create({
|
||||
data: {
|
||||
name: e.name,
|
||||
location: 'Unknown',
|
||||
location: e.location || 'Unknown',
|
||||
startDate: e.startDate,
|
||||
endDate: e.endDate,
|
||||
description: null,
|
||||
@@ -96,8 +210,7 @@ async function importWorldsdc({ since = null, until = null, limit, dryRun = fals
|
||||
});
|
||||
created.push({ ...e, id: saved.id, slug: saved.slug });
|
||||
}
|
||||
return { fetched, considered: list.length, created, skipped };
|
||||
return { fetched, considered: list.length, created, updated, skipped };
|
||||
}
|
||||
|
||||
module.exports = { parseEventsFromCalendar, importWorldsdc };
|
||||
|
||||
module.exports = { parseEventsFromCalendar, parseEventsFromList, importWorldsdc };
|
||||
|
||||
Reference in New Issue
Block a user