Add duplicate detection with fuzzy matching and fix artist metadata extraction
This commit is contained in:
@@ -462,6 +462,16 @@ export default function AdminPage() {
|
|||||||
song: data.song,
|
song: data.song,
|
||||||
validation: data.validation
|
validation: data.validation
|
||||||
});
|
});
|
||||||
|
} else if (res.status === 409) {
|
||||||
|
// Duplicate detected
|
||||||
|
const data = await res.json();
|
||||||
|
results.push({
|
||||||
|
filename: file.name,
|
||||||
|
success: false,
|
||||||
|
isDuplicate: true,
|
||||||
|
duplicate: data.duplicate,
|
||||||
|
error: `Duplicate: Already exists as "${data.duplicate.title}" by "${data.duplicate.artist}"`
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
results.push({
|
results.push({
|
||||||
filename: file.name,
|
filename: file.name,
|
||||||
@@ -486,12 +496,24 @@ export default function AdminPage() {
|
|||||||
|
|
||||||
// Auto-trigger categorization after uploads
|
// Auto-trigger categorization after uploads
|
||||||
const successCount = results.filter(r => r.success).length;
|
const successCount = results.filter(r => r.success).length;
|
||||||
|
const duplicateCount = results.filter(r => r.isDuplicate).length;
|
||||||
|
const failedCount = results.filter(r => !r.success && !r.isDuplicate).length;
|
||||||
if (successCount > 0) {
|
if (successCount > 0) {
|
||||||
setMessage(`✅ Uploaded ${successCount}/${files.length} songs successfully!\n\n🤖 Starting auto-categorization...`);
|
let msg = `✅ Uploaded ${successCount}/${files.length} songs successfully!`;
|
||||||
|
if (duplicateCount > 0) {
|
||||||
|
msg += `\n⚠️ Skipped ${duplicateCount} duplicate(s)`;
|
||||||
|
}
|
||||||
|
if (failedCount > 0) {
|
||||||
|
msg += `\n❌ ${failedCount} failed`;
|
||||||
|
}
|
||||||
|
msg += '\n\n🤖 Starting auto-categorization...';
|
||||||
|
setMessage(msg);
|
||||||
// Small delay to let user see the message
|
// Small delay to let user see the message
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
handleAICategorization();
|
handleAICategorization();
|
||||||
}, 1000);
|
}, 1000);
|
||||||
|
} else if (duplicateCount > 0 && failedCount === 0) {
|
||||||
|
setMessage(`⚠️ All ${duplicateCount} file(s) were duplicates - nothing uploaded.`);
|
||||||
} else {
|
} else {
|
||||||
setMessage(`❌ All uploads failed.`);
|
setMessage(`❌ All uploads failed.`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { PrismaClient } from '@prisma/client';
|
|||||||
import { writeFile, unlink } from 'fs/promises';
|
import { writeFile, unlink } from 'fs/promises';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { parseBuffer } from 'music-metadata';
|
import { parseBuffer } from 'music-metadata';
|
||||||
|
import { isDuplicateSong } from '@/lib/fuzzyMatch';
|
||||||
|
|
||||||
const prisma = new PrismaClient();
|
const prisma = new PrismaClient();
|
||||||
|
|
||||||
@@ -72,8 +73,16 @@ export async function POST(request: Request) {
|
|||||||
if (metadata.common.title) {
|
if (metadata.common.title) {
|
||||||
title = metadata.common.title;
|
title = metadata.common.title;
|
||||||
}
|
}
|
||||||
if (metadata.common.artist) {
|
|
||||||
|
// Handle artist - prefer artists array if available
|
||||||
|
if (metadata.common.artists && metadata.common.artists.length > 0) {
|
||||||
|
// Join multiple artists with '/'
|
||||||
|
artist = metadata.common.artists.join('/');
|
||||||
|
} else if (metadata.common.artist) {
|
||||||
artist = metadata.common.artist;
|
artist = metadata.common.artist;
|
||||||
|
} else if (metadata.common.albumartist) {
|
||||||
|
// Fallback to album artist
|
||||||
|
artist = metadata.common.albumartist;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validation info
|
// Validation info
|
||||||
@@ -114,6 +123,28 @@ export async function POST(request: Request) {
|
|||||||
if (!title) title = 'Unknown Title';
|
if (!title) title = 'Unknown Title';
|
||||||
if (!artist) artist = 'Unknown Artist';
|
if (!artist) artist = 'Unknown Artist';
|
||||||
|
|
||||||
|
// Check for duplicates
|
||||||
|
const existingSongs = await prisma.song.findMany({
|
||||||
|
select: { id: true, title: true, artist: true, filename: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const existing of existingSongs) {
|
||||||
|
if (isDuplicateSong(artist, title, existing.artist, existing.title)) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
error: 'Duplicate song detected',
|
||||||
|
duplicate: {
|
||||||
|
id: existing.id,
|
||||||
|
title: existing.title,
|
||||||
|
artist: existing.artist,
|
||||||
|
filename: existing.filename
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ status: 409 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create URL-safe filename
|
// Create URL-safe filename
|
||||||
const originalName = file.name.replace(/\.mp3$/i, '');
|
const originalName = file.name.replace(/\.mp3$/i, '');
|
||||||
const sanitizedName = originalName
|
const sanitizedName = originalName
|
||||||
|
|||||||
97
lib/fuzzyMatch.ts
Normal file
97
lib/fuzzyMatch.ts
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* Fuzzy string matching utility for duplicate detection
|
||||||
|
* Uses Levenshtein distance to compare strings with tolerance for formatting variations
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a string for comparison
|
||||||
|
* - Converts to lowercase
|
||||||
|
* - Removes special characters
|
||||||
|
* - Normalizes whitespace
|
||||||
|
*/
|
||||||
|
function normalizeString(str: string): string {
|
||||||
|
return str
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9\s]/g, '') // Remove special chars
|
||||||
|
.replace(/\s+/g, ' ') // Normalize whitespace
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate Levenshtein distance between two strings
|
||||||
|
* Returns the minimum number of single-character edits needed to change one string into the other
|
||||||
|
*/
|
||||||
|
function levenshteinDistance(a: string, b: string): number {
|
||||||
|
if (a.length === 0) return b.length;
|
||||||
|
if (b.length === 0) return a.length;
|
||||||
|
|
||||||
|
const matrix: number[][] = [];
|
||||||
|
|
||||||
|
// Initialize first column
|
||||||
|
for (let i = 0; i <= b.length; i++) {
|
||||||
|
matrix[i] = [i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize first row
|
||||||
|
for (let j = 0; j <= a.length; j++) {
|
||||||
|
matrix[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill in the rest of the matrix
|
||||||
|
for (let i = 1; i <= b.length; i++) {
|
||||||
|
for (let j = 1; j <= a.length; j++) {
|
||||||
|
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
||||||
|
matrix[i][j] = matrix[i - 1][j - 1];
|
||||||
|
} else {
|
||||||
|
matrix[i][j] = Math.min(
|
||||||
|
matrix[i - 1][j - 1] + 1, // substitution
|
||||||
|
matrix[i][j - 1] + 1, // insertion
|
||||||
|
matrix[i - 1][j] + 1 // deletion
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matrix[b.length][a.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if two strings are similar based on Levenshtein distance
|
||||||
|
* @param str1 First string to compare
|
||||||
|
* @param str2 Second string to compare
|
||||||
|
* @param threshold Similarity threshold (0-1), default 0.85
|
||||||
|
* @returns true if strings are similar enough
|
||||||
|
*/
|
||||||
|
export function isSimilar(str1: string, str2: string, threshold = 0.85): boolean {
|
||||||
|
if (!str1 || !str2) return false;
|
||||||
|
|
||||||
|
const norm1 = normalizeString(str1);
|
||||||
|
const norm2 = normalizeString(str2);
|
||||||
|
|
||||||
|
// Exact match after normalization
|
||||||
|
if (norm1 === norm2) return true;
|
||||||
|
|
||||||
|
const distance = levenshteinDistance(norm1, norm2);
|
||||||
|
const maxLen = Math.max(norm1.length, norm2.length);
|
||||||
|
|
||||||
|
// Avoid division by zero
|
||||||
|
if (maxLen === 0) return true;
|
||||||
|
|
||||||
|
const similarity = 1 - (distance / maxLen);
|
||||||
|
|
||||||
|
return similarity >= threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a song (artist + title) is a duplicate of another
|
||||||
|
* Both artist AND title must be similar for a match
|
||||||
|
*/
|
||||||
|
export function isDuplicateSong(
|
||||||
|
artist1: string,
|
||||||
|
title1: string,
|
||||||
|
artist2: string,
|
||||||
|
title2: string,
|
||||||
|
threshold = 0.85
|
||||||
|
): boolean {
|
||||||
|
return isSimilar(artist1, artist2, threshold) && isSimilar(title1, title2, threshold);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user