From 96f5f66f013b31a6861cbda0643d39e4afc45e26 Mon Sep 17 00:00:00 2001 From: ChomeNS <95471003+ChomeNS@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:24:23 +0700 Subject: [PATCH] fix: support Shift-JIS encoding for midi for japanese MIDIs --- build-number.txt | 2 +- .../chomens_bot/song/MidiConverter.java | 30 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/build-number.txt b/build-number.txt index 7255dbc5..7c4af730 100644 --- a/build-number.txt +++ b/build-number.txt @@ -1 +1 @@ -2974 \ No newline at end of file +2983 \ No newline at end of file diff --git a/src/main/java/me/chayapak1/chomens_bot/song/MidiConverter.java b/src/main/java/me/chayapak1/chomens_bot/song/MidiConverter.java index 591abc25..6766225a 100644 --- a/src/main/java/me/chayapak1/chomens_bot/song/MidiConverter.java +++ b/src/main/java/me/chayapak1/chomens_bot/song/MidiConverter.java @@ -5,7 +5,8 @@ import me.chayapak1.chomens_bot.Bot; import javax.sound.midi.*; import java.io.ByteArrayInputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; +import java.nio.ByteBuffer; +import java.nio.charset.*; import java.util.*; import static javax.sound.midi.ShortMessage.SYSTEM_RESET; @@ -51,7 +52,7 @@ public class MidiConverter implements Converter { if (mm.getType() == SET_TEMPO) { tempoEvents.add(event); } else if (mm.getType() == TRACK_NAME) { - final String stringTitle = new String(mm.getData(), StandardCharsets.UTF_8); + final String stringTitle = decodeStringWithUTF8OrShiftJIS(mm.getData()); if (stringTitle.isBlank()) continue; @@ -64,10 +65,10 @@ public class MidiConverter implements Converter { isFirst = false; } } else if (mm.getType() == TEXT) { - text.append(new String(mm.getData(), StandardCharsets.UTF_8)); + text.append(decodeStringWithUTF8OrShiftJIS(mm.getData())); text.append('\n'); } else if (mm.getType() == LYRICS) { - final String lyric = new String(mm.getMessage(), StandardCharsets.UTF_8); + final String lyric = decodeStringWithUTF8OrShiftJIS(mm.getMessage()); lyrics.put(event.getTick(), lyric); } @@ -452,4 +453,25 @@ public class MidiConverter implements Converter { percussionMap.put(86, 14 + 25 * Instrument.BASEDRUM.id); percussionMap.put(87, 7 + 25 * Instrument.BASEDRUM.id); } + + // this is needed for japanese MIDIs like night of nights, which uses shift jis encoded string as its title + // here is the bytes of it + // [-125, 105, -125, 67, -125, 103, -127, 69, -125, 73, -125, 117, -127, 69, -125, 105, -125, 67, -125, 99] + // + // jshell> new String(bytes, java.nio.charset.StandardCharsets.UTF_8) + // $5 ==> "�i�C�g�E�I�u�E�i�C�c" + // + // jshell> new String(bytes, java.nio.charset.Charset.forName("Shift-JIS")) + // $4 ==> "ナイト・オブ・ナイツ" + private static String decodeStringWithUTF8OrShiftJIS (final byte[] bytes) { + final CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder(); + utf8Decoder.onMalformedInput(CodingErrorAction.REPORT); // detect invalid bytes, so we can fall back + utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + + try { + return utf8Decoder.decode(ByteBuffer.wrap(bytes)).toString(); + } catch (final CharacterCodingException e) { + return new String(bytes, Charset.forName("Shift_JIS")); + } + } }