diff --git a/.forgejo/scripts/cultural-groups.js b/.forgejo/scripts/cultural-groups.js index 8e6cd2b..a1a0d80 100644 --- a/.forgejo/scripts/cultural-groups.js +++ b/.forgejo/scripts/cultural-groups.js @@ -1,24 +1,30 @@ const fs = require('fs'); const path = require('path'); -// Define cultural groups by country +// Define cultural groups by country with more precise matching const culturalGroups = { anglosphere: [ - 'united kingdom', 'uk', 'britain', 'england', 'scotland', 'wales', 'northern ireland', - 'united states', 'usa', 'america', 'canada', 'australia', 'new zealand', 'ireland' + 'united kingdom', 'uk', 'great britain', 'britain', + 'england', 'scotland', 'wales', 'northern ireland', + '^united states$', '^usa$', '^us$', '^america$', + '^canada$', + '^australia$', + '^new zealand$', + '^ireland$' ], francophone: [ - 'france', 'belgium', 'switzerland', 'quebec', 'monaco', - 'luxembourg', 'haiti', 'ivory coast', 'senegal', 'cameroon' + '^france$', '^belgium$', '^switzerland$', '^quebec$', '^monaco$', + '^luxembourg$', '^haiti$', '^ivory coast$', '^senegal$', '^cameroon$' ], hispanic: [ - 'spain', 'mexico', 'argentina', 'chile', 'colombia', 'peru', - 'venezuela', 'ecuador', 'guatemala', 'cuba', 'dominican republic', - 'honduras', 'el salvador', 'nicaragua', 'costa rica', 'panama' + '^spain$', '^mexico$', '^argentina$', '^chile$', '^colombia$', '^peru$', + '^venezuela$', '^ecuador$', '^guatemala$', '^cuba$', '^dominican republic$', + '^honduras$', '^el salvador$', '^nicaragua$', '^costa rica$', '^panama$', + '^bolivia$', '^paraguay$', '^uruguay$', 'latin america' ], lusophone: [ - 'portugal', 'brazil', 'angola', 'mozambique', - 'cape verde', 'guinea-bissau', 'sao tome and principe' + '^portugal$', '^brazil$', '^angola$', '^mozambique$', + '^cape verde$', '^guinea-bissau$', '^sao tome and principe$' ], arabic: [ 'saudi arabia', 'egypt', 'uae', 'united arab emirates', 'qatar', @@ -27,54 +33,54 @@ const culturalGroups = { 'algeria', 'morocco', 'sudan' ], germanosphere: [ - 'germany', 'austria', 'switzerland', 'luxembourg', 'liechtenstein' + '^germany$', '^austria$', '^switzerland$', '^luxembourg$', '^liechtenstein$' ], slavic: [ - 'russia', 'ukraine', 'belarus', 'poland', 'czech republic', - 'slovakia', 'serbia', 'croatia', 'bosnia', 'montenegro', - 'slovenia', 'bulgaria', 'north macedonia' + '^russia$', '^ukraine$', '^belarus$', '^poland$', '^czech republic$', + '^slovakia$', '^serbia$', '^croatia$', '^bosnia$', '^montenegro$', + '^slovenia$', '^bulgaria$', '^north macedonia$' ], sinosphere: [ - 'china', 'hong kong', 'taiwan', 'singapore', 'macau' + '^china$', 'hong kong', '^taiwan$', '^singapore$', '^macau$' ], indosphere: [ - 'india', 'pakistan', 'bangladesh', 'nepal', 'sri lanka', - 'bhutan', 'maldives' + '^india$', '^pakistan$', '^bangladesh$', '^nepal$', '^sri lanka$', + '^bhutan$', '^maldives$' ], turkic: [ - 'turkey', 'azerbaijan', 'uzbekistan', 'kazakhstan', - 'kyrgyzstan', 'turkmenistan' + '^turkey$', '^azerbaijan$', '^uzbekistan$', '^kazakhstan$', + '^kyrgyzstan$', '^turkmenistan$' ], nordic: [ - 'sweden', 'norway', 'denmark', 'finland', 'iceland', - 'faroe islands', 'greenland' + '^sweden$', '^norway$', '^denmark$', '^finland$', '^iceland$', + 'faroe islands', '^greenland$' ], baltic: [ - 'estonia', 'latvia', 'lithuania' + '^estonia$', '^latvia$', '^lithuania$' ], hellenic: [ - 'greece', 'cyprus' + '^greece$', '^cyprus$' ], benelux: [ - 'netherlands', 'belgium', 'luxembourg' + '^netherlands$', '^belgium$', '^luxembourg$' ], persian: [ - 'iran', 'afghanistan', 'tajikistan' + '^iran$', '^afghanistan$', '^tajikistan$' ], malaysphere: [ - 'malaysia', 'brunei', 'indonesia' + '^malaysia$', '^brunei$', '^indonesia$' ], korean: [ 'south korea', 'korea', 'north korea' ], japanese: [ - 'japan' + '^japan$' ], vietnamese: [ - 'vietnam' + '^vietnam$' ], thai: [ - 'thailand' + '^thailand$' ] }; @@ -85,12 +91,21 @@ function getCulturalGroup(channelInfo) { // Check if the country belongs to any cultural group for (const [group, countries] of Object.entries(culturalGroups)) { - if (countries.some(country => groupTitle.includes(country))) { + // Use exact matching with RegExp + if (countries.some(country => { + // If the country pattern starts with ^, use it as a RegExp + if (country.startsWith('^')) { + const regex = new RegExp(country, 'i'); + return regex.test(groupTitle); + } + // Otherwise, use includes for flexible matching (for multi-word countries) + return groupTitle.includes(country); + })) { return group; } } - return null; // Return null instead of 'other' for non-matching channels + return null; } function splitByCulturalGroup(filePath) {