forked from Mystique/Mystique
Move culutral groups to regex to fix mis-matches
This commit is contained in:
parent
467535ca84
commit
ef85c73afb
1 changed files with 46 additions and 31 deletions
|
@ -1,24 +1,30 @@
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
|
||||||
// Define cultural groups by country
|
// Define cultural groups by country with more precise matching
|
||||||
const culturalGroups = {
|
const culturalGroups = {
|
||||||
anglosphere: [
|
anglosphere: [
|
||||||
'united kingdom', 'uk', 'britain', 'england', 'scotland', 'wales', 'northern ireland',
|
'united kingdom', 'uk', 'great britain', 'britain',
|
||||||
'united states', 'usa', 'america', 'canada', 'australia', 'new zealand', 'ireland'
|
'england', 'scotland', 'wales', 'northern ireland',
|
||||||
|
'^united states$', '^usa$', '^us$', '^america$',
|
||||||
|
'^canada$',
|
||||||
|
'^australia$',
|
||||||
|
'^new zealand$',
|
||||||
|
'^ireland$'
|
||||||
],
|
],
|
||||||
francophone: [
|
francophone: [
|
||||||
'france', 'belgium', 'switzerland', 'quebec', 'monaco',
|
'^france$', '^belgium$', '^switzerland$', '^quebec$', '^monaco$',
|
||||||
'luxembourg', 'haiti', 'ivory coast', 'senegal', 'cameroon'
|
'^luxembourg$', '^haiti$', '^ivory coast$', '^senegal$', '^cameroon$'
|
||||||
],
|
],
|
||||||
hispanic: [
|
hispanic: [
|
||||||
'spain', 'mexico', 'argentina', 'chile', 'colombia', 'peru',
|
'^spain$', '^mexico$', '^argentina$', '^chile$', '^colombia$', '^peru$',
|
||||||
'venezuela', 'ecuador', 'guatemala', 'cuba', 'dominican republic',
|
'^venezuela$', '^ecuador$', '^guatemala$', '^cuba$', '^dominican republic$',
|
||||||
'honduras', 'el salvador', 'nicaragua', 'costa rica', 'panama'
|
'^honduras$', '^el salvador$', '^nicaragua$', '^costa rica$', '^panama$',
|
||||||
|
'^bolivia$', '^paraguay$', '^uruguay$', 'latin america'
|
||||||
],
|
],
|
||||||
lusophone: [
|
lusophone: [
|
||||||
'portugal', 'brazil', 'angola', 'mozambique',
|
'^portugal$', '^brazil$', '^angola$', '^mozambique$',
|
||||||
'cape verde', 'guinea-bissau', 'sao tome and principe'
|
'^cape verde$', '^guinea-bissau$', '^sao tome and principe$'
|
||||||
],
|
],
|
||||||
arabic: [
|
arabic: [
|
||||||
'saudi arabia', 'egypt', 'uae', 'united arab emirates', 'qatar',
|
'saudi arabia', 'egypt', 'uae', 'united arab emirates', 'qatar',
|
||||||
|
@ -27,54 +33,54 @@ const culturalGroups = {
|
||||||
'algeria', 'morocco', 'sudan'
|
'algeria', 'morocco', 'sudan'
|
||||||
],
|
],
|
||||||
germanosphere: [
|
germanosphere: [
|
||||||
'germany', 'austria', 'switzerland', 'luxembourg', 'liechtenstein'
|
'^germany$', '^austria$', '^switzerland$', '^luxembourg$', '^liechtenstein$'
|
||||||
],
|
],
|
||||||
slavic: [
|
slavic: [
|
||||||
'russia', 'ukraine', 'belarus', 'poland', 'czech republic',
|
'^russia$', '^ukraine$', '^belarus$', '^poland$', '^czech republic$',
|
||||||
'slovakia', 'serbia', 'croatia', 'bosnia', 'montenegro',
|
'^slovakia$', '^serbia$', '^croatia$', '^bosnia$', '^montenegro$',
|
||||||
'slovenia', 'bulgaria', 'north macedonia'
|
'^slovenia$', '^bulgaria$', '^north macedonia$'
|
||||||
],
|
],
|
||||||
sinosphere: [
|
sinosphere: [
|
||||||
'china', 'hong kong', 'taiwan', 'singapore', 'macau'
|
'^china$', 'hong kong', '^taiwan$', '^singapore$', '^macau$'
|
||||||
],
|
],
|
||||||
indosphere: [
|
indosphere: [
|
||||||
'india', 'pakistan', 'bangladesh', 'nepal', 'sri lanka',
|
'^india$', '^pakistan$', '^bangladesh$', '^nepal$', '^sri lanka$',
|
||||||
'bhutan', 'maldives'
|
'^bhutan$', '^maldives$'
|
||||||
],
|
],
|
||||||
turkic: [
|
turkic: [
|
||||||
'turkey', 'azerbaijan', 'uzbekistan', 'kazakhstan',
|
'^turkey$', '^azerbaijan$', '^uzbekistan$', '^kazakhstan$',
|
||||||
'kyrgyzstan', 'turkmenistan'
|
'^kyrgyzstan$', '^turkmenistan$'
|
||||||
],
|
],
|
||||||
nordic: [
|
nordic: [
|
||||||
'sweden', 'norway', 'denmark', 'finland', 'iceland',
|
'^sweden$', '^norway$', '^denmark$', '^finland$', '^iceland$',
|
||||||
'faroe islands', 'greenland'
|
'faroe islands', '^greenland$'
|
||||||
],
|
],
|
||||||
baltic: [
|
baltic: [
|
||||||
'estonia', 'latvia', 'lithuania'
|
'^estonia$', '^latvia$', '^lithuania$'
|
||||||
],
|
],
|
||||||
hellenic: [
|
hellenic: [
|
||||||
'greece', 'cyprus'
|
'^greece$', '^cyprus$'
|
||||||
],
|
],
|
||||||
benelux: [
|
benelux: [
|
||||||
'netherlands', 'belgium', 'luxembourg'
|
'^netherlands$', '^belgium$', '^luxembourg$'
|
||||||
],
|
],
|
||||||
persian: [
|
persian: [
|
||||||
'iran', 'afghanistan', 'tajikistan'
|
'^iran$', '^afghanistan$', '^tajikistan$'
|
||||||
],
|
],
|
||||||
malaysphere: [
|
malaysphere: [
|
||||||
'malaysia', 'brunei', 'indonesia'
|
'^malaysia$', '^brunei$', '^indonesia$'
|
||||||
],
|
],
|
||||||
korean: [
|
korean: [
|
||||||
'south korea', 'korea', 'north korea'
|
'south korea', 'korea', 'north korea'
|
||||||
],
|
],
|
||||||
japanese: [
|
japanese: [
|
||||||
'japan'
|
'^japan$'
|
||||||
],
|
],
|
||||||
vietnamese: [
|
vietnamese: [
|
||||||
'vietnam'
|
'^vietnam$'
|
||||||
],
|
],
|
||||||
thai: [
|
thai: [
|
||||||
'thailand'
|
'^thailand$'
|
||||||
]
|
]
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -85,12 +91,21 @@ function getCulturalGroup(channelInfo) {
|
||||||
|
|
||||||
// Check if the country belongs to any cultural group
|
// Check if the country belongs to any cultural group
|
||||||
for (const [group, countries] of Object.entries(culturalGroups)) {
|
for (const [group, countries] of Object.entries(culturalGroups)) {
|
||||||
if (countries.some(country => groupTitle.includes(country))) {
|
// Use exact matching with RegExp
|
||||||
|
if (countries.some(country => {
|
||||||
|
// If the country pattern starts with ^, use it as a RegExp
|
||||||
|
if (country.startsWith('^')) {
|
||||||
|
const regex = new RegExp(country, 'i');
|
||||||
|
return regex.test(groupTitle);
|
||||||
|
}
|
||||||
|
// Otherwise, use includes for flexible matching (for multi-word countries)
|
||||||
|
return groupTitle.includes(country);
|
||||||
|
})) {
|
||||||
return group;
|
return group;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null; // Return null instead of 'other' for non-matching channels
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function splitByCulturalGroup(filePath) {
|
function splitByCulturalGroup(filePath) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue