forked from Mystique/Mystique
Move culutral groups to regex to fix mis-matches
This commit is contained in:
parent
467535ca84
commit
ef85c73afb
1 changed files with 46 additions and 31 deletions
|
@ -1,24 +1,30 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Define cultural groups by country
|
||||
// Define cultural groups by country with more precise matching
|
||||
const culturalGroups = {
|
||||
anglosphere: [
|
||||
'united kingdom', 'uk', 'britain', 'england', 'scotland', 'wales', 'northern ireland',
|
||||
'united states', 'usa', 'america', 'canada', 'australia', 'new zealand', 'ireland'
|
||||
'united kingdom', 'uk', 'great britain', 'britain',
|
||||
'england', 'scotland', 'wales', 'northern ireland',
|
||||
'^united states$', '^usa$', '^us$', '^america$',
|
||||
'^canada$',
|
||||
'^australia$',
|
||||
'^new zealand$',
|
||||
'^ireland$'
|
||||
],
|
||||
francophone: [
|
||||
'france', 'belgium', 'switzerland', 'quebec', 'monaco',
|
||||
'luxembourg', 'haiti', 'ivory coast', 'senegal', 'cameroon'
|
||||
'^france$', '^belgium$', '^switzerland$', '^quebec$', '^monaco$',
|
||||
'^luxembourg$', '^haiti$', '^ivory coast$', '^senegal$', '^cameroon$'
|
||||
],
|
||||
hispanic: [
|
||||
'spain', 'mexico', 'argentina', 'chile', 'colombia', 'peru',
|
||||
'venezuela', 'ecuador', 'guatemala', 'cuba', 'dominican republic',
|
||||
'honduras', 'el salvador', 'nicaragua', 'costa rica', 'panama'
|
||||
'^spain$', '^mexico$', '^argentina$', '^chile$', '^colombia$', '^peru$',
|
||||
'^venezuela$', '^ecuador$', '^guatemala$', '^cuba$', '^dominican republic$',
|
||||
'^honduras$', '^el salvador$', '^nicaragua$', '^costa rica$', '^panama$',
|
||||
'^bolivia$', '^paraguay$', '^uruguay$', 'latin america'
|
||||
],
|
||||
lusophone: [
|
||||
'portugal', 'brazil', 'angola', 'mozambique',
|
||||
'cape verde', 'guinea-bissau', 'sao tome and principe'
|
||||
'^portugal$', '^brazil$', '^angola$', '^mozambique$',
|
||||
'^cape verde$', '^guinea-bissau$', '^sao tome and principe$'
|
||||
],
|
||||
arabic: [
|
||||
'saudi arabia', 'egypt', 'uae', 'united arab emirates', 'qatar',
|
||||
|
@ -27,54 +33,54 @@ const culturalGroups = {
|
|||
'algeria', 'morocco', 'sudan'
|
||||
],
|
||||
germanosphere: [
|
||||
'germany', 'austria', 'switzerland', 'luxembourg', 'liechtenstein'
|
||||
'^germany$', '^austria$', '^switzerland$', '^luxembourg$', '^liechtenstein$'
|
||||
],
|
||||
slavic: [
|
||||
'russia', 'ukraine', 'belarus', 'poland', 'czech republic',
|
||||
'slovakia', 'serbia', 'croatia', 'bosnia', 'montenegro',
|
||||
'slovenia', 'bulgaria', 'north macedonia'
|
||||
'^russia$', '^ukraine$', '^belarus$', '^poland$', '^czech republic$',
|
||||
'^slovakia$', '^serbia$', '^croatia$', '^bosnia$', '^montenegro$',
|
||||
'^slovenia$', '^bulgaria$', '^north macedonia$'
|
||||
],
|
||||
sinosphere: [
|
||||
'china', 'hong kong', 'taiwan', 'singapore', 'macau'
|
||||
'^china$', 'hong kong', '^taiwan$', '^singapore$', '^macau$'
|
||||
],
|
||||
indosphere: [
|
||||
'india', 'pakistan', 'bangladesh', 'nepal', 'sri lanka',
|
||||
'bhutan', 'maldives'
|
||||
'^india$', '^pakistan$', '^bangladesh$', '^nepal$', '^sri lanka$',
|
||||
'^bhutan$', '^maldives$'
|
||||
],
|
||||
turkic: [
|
||||
'turkey', 'azerbaijan', 'uzbekistan', 'kazakhstan',
|
||||
'kyrgyzstan', 'turkmenistan'
|
||||
'^turkey$', '^azerbaijan$', '^uzbekistan$', '^kazakhstan$',
|
||||
'^kyrgyzstan$', '^turkmenistan$'
|
||||
],
|
||||
nordic: [
|
||||
'sweden', 'norway', 'denmark', 'finland', 'iceland',
|
||||
'faroe islands', 'greenland'
|
||||
'^sweden$', '^norway$', '^denmark$', '^finland$', '^iceland$',
|
||||
'faroe islands', '^greenland$'
|
||||
],
|
||||
baltic: [
|
||||
'estonia', 'latvia', 'lithuania'
|
||||
'^estonia$', '^latvia$', '^lithuania$'
|
||||
],
|
||||
hellenic: [
|
||||
'greece', 'cyprus'
|
||||
'^greece$', '^cyprus$'
|
||||
],
|
||||
benelux: [
|
||||
'netherlands', 'belgium', 'luxembourg'
|
||||
'^netherlands$', '^belgium$', '^luxembourg$'
|
||||
],
|
||||
persian: [
|
||||
'iran', 'afghanistan', 'tajikistan'
|
||||
'^iran$', '^afghanistan$', '^tajikistan$'
|
||||
],
|
||||
malaysphere: [
|
||||
'malaysia', 'brunei', 'indonesia'
|
||||
'^malaysia$', '^brunei$', '^indonesia$'
|
||||
],
|
||||
korean: [
|
||||
'south korea', 'korea', 'north korea'
|
||||
],
|
||||
japanese: [
|
||||
'japan'
|
||||
'^japan$'
|
||||
],
|
||||
vietnamese: [
|
||||
'vietnam'
|
||||
'^vietnam$'
|
||||
],
|
||||
thai: [
|
||||
'thailand'
|
||||
'^thailand$'
|
||||
]
|
||||
};
|
||||
|
||||
|
@ -85,12 +91,21 @@ function getCulturalGroup(channelInfo) {
|
|||
|
||||
// Check if the country belongs to any cultural group
|
||||
for (const [group, countries] of Object.entries(culturalGroups)) {
|
||||
if (countries.some(country => groupTitle.includes(country))) {
|
||||
// Use exact matching with RegExp
|
||||
if (countries.some(country => {
|
||||
// If the country pattern starts with ^, use it as a RegExp
|
||||
if (country.startsWith('^')) {
|
||||
const regex = new RegExp(country, 'i');
|
||||
return regex.test(groupTitle);
|
||||
}
|
||||
// Otherwise, use includes for flexible matching (for multi-word countries)
|
||||
return groupTitle.includes(country);
|
||||
})) {
|
||||
return group;
|
||||
}
|
||||
}
|
||||
|
||||
return null; // Return null instead of 'other' for non-matching channels
|
||||
return null;
|
||||
}
|
||||
|
||||
function splitByCulturalGroup(filePath) {
|
||||
|
|
Loading…
Add table
Reference in a new issue