diff --git a/build-number.txt b/build-number.txt index a6ae6d78..9e1a2f18 100644 --- a/build-number.txt +++ b/build-number.txt @@ -1 +1 @@ -2206 \ No newline at end of file +2214 \ No newline at end of file diff --git a/src/main/java/me/chayapak1/chomens_bot/commands/WikipediaCommand.java b/src/main/java/me/chayapak1/chomens_bot/commands/WikipediaCommand.java index ab207c9d..ab2cad28 100644 --- a/src/main/java/me/chayapak1/chomens_bot/commands/WikipediaCommand.java +++ b/src/main/java/me/chayapak1/chomens_bot/commands/WikipediaCommand.java @@ -10,6 +10,7 @@ import me.chayapak1.chomens_bot.command.CommandContext; import me.chayapak1.chomens_bot.command.CommandException; import me.chayapak1.chomens_bot.command.TrustLevel; import me.chayapak1.chomens_bot.util.ColorUtilities; +import me.chayapak1.chomens_bot.util.HTMLUtilities; import me.chayapak1.chomens_bot.util.HttpUtilities; import net.kyori.adventure.text.Component; import net.kyori.adventure.text.format.NamedTextColor; @@ -23,7 +24,7 @@ import java.nio.charset.StandardCharsets; public class WikipediaCommand extends Command { public static final String pageIDStringURL = "https://en.wikipedia.org/w/api.php?prop=info%%7Cpageprops&inprop=url&ppprop=disambiguation&titles=%s&format=json&redirects=&action=query&origin=*&"; - public static final String outputStringURL = "https://en.wikipedia.org/w/api.php?prop=extracts&explaintext=&exintro=&pageids=%d&format=json&redirects=&action=query&origin=*&"; + public static final String outputStringURL = "https://en.wikipedia.org/w/api.php?prop=extracts&exintro=&pageids=%d&format=json&redirects=&action=query&origin=*&"; public WikipediaCommand () { super( @@ -80,8 +81,12 @@ public class WikipediaCommand extends Command { final int pageID = Integer.parseInt(pages.entrySet().iterator().next().getKey()); if (pageID == -1) { - context.sendOutput(Component.text("Cannot find page: " + page).color(NamedTextColor.RED)); - return; + throw new CommandException( + Component.translatable( + "Cannot find page: %s", + Component.text(page) + ) + ); } final URL outputUrl = new URI(String.format(outputStringURL, pageID)).toURL(); @@ -94,7 +99,9 @@ public class WikipediaCommand extends Command { .getAsJsonObject(String.valueOf(pageID)); final String title = pageOutput.get("title").getAsString(); - final String extract = pageOutput.get("extract").getAsString(); + final String extracted = HTMLUtilities.toFormattingCodes(pageOutput.get("extract").getAsString()); + + if (extracted == null) throw new CommandException(Component.text("No contents found")); component = component .append( @@ -107,12 +114,14 @@ public class WikipediaCommand extends Command { ) ) .append(Component.newline()) - .append(Component.text(extract).color(NamedTextColor.GREEN)); + .append(Component.text(extracted).color(NamedTextColor.GREEN)); context.sendOutput(component); } catch (NumberFormatException e) { context.sendOutput(Component.text("Failed parsing page ID").color(NamedTextColor.RED)); bot.logger.error(e); + } catch (CommandException e) { + context.sendOutput(e.message.color(NamedTextColor.RED)); } catch (Exception e) { context.sendOutput(Component.text(e.toString()).color(NamedTextColor.RED)); } diff --git a/src/main/java/me/chayapak1/chomens_bot/util/HTMLUtilities.java b/src/main/java/me/chayapak1/chomens_bot/util/HTMLUtilities.java new file mode 100644 index 00000000..7ddf74d0 --- /dev/null +++ b/src/main/java/me/chayapak1/chomens_bot/util/HTMLUtilities.java @@ -0,0 +1,169 @@ +package me.chayapak1.chomens_bot.util; + +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +// https://github.com/Earthcomputer/clientcommands/blob/fabric/src/main/java/net/earthcomputer/clientcommands/features/WikiRetriever.java +public class HTMLUtilities { + private static final Pattern HTML_TAG_PATTERN = Pattern.compile("<\\s*(/)?\\s*(\\w+).*?>||\n", Pattern.DOTALL); + private static final String CODE_COLOR = "§2"; + private static final String DEFAULT_COLOR = "§a"; + + public static String toFormattingCodes (String html) { + Matcher matcher = HTML_TAG_PATTERN.matcher(html); + StringBuilder raw = new StringBuilder(); + + boolean bold = false, italic = false, underline = false, code = false; + + // -1 for not in list, 0 for unordered list, >= 1 for ordered list + int listIndex = -1; + + while (matcher.find()) { + matcher.appendReplacement(raw, ""); + + boolean endTag = matcher.group(1) != null; + String tagName = matcher.group(2); + if (tagName == null) { + // we're in a comment or newline + continue; + } + tagName = tagName.toLowerCase(Locale.ENGLISH); + + if (!endTag) { + switch (tagName) { + case "b": + raw.append("§l"); + bold = true; + break; + case "i": + raw.append("§o"); + italic = true; + break; + case "u": + case "dt": + raw.append("§n"); + underline = true; + break; + case "code": + raw.append(CODE_COLOR); + if (bold) { + raw.append("§l"); + } + if (italic) { + raw.append("§o"); + } + if (underline) { + raw.append("§n"); + } + code = true; + break; + case "dd": + raw.append(" "); + break; + case "ul": + listIndex = 0; + break; + case "ol": + listIndex = 1; + break; + case "li": + if (listIndex >= 1) { + raw.append(" ").append(listIndex).append(". "); + listIndex++; + } else { + raw.append(" • "); + } + break; + case "br": + raw.append("\n"); + } + } else { + switch (tagName) { + case "b": + if (code) { + raw.append(CODE_COLOR); + } else { + raw.append(DEFAULT_COLOR); + } + if (italic) { + raw.append("§o"); + } + if (underline) { + raw.append("§n"); + } + bold = false; + break; + case "i": + if (code) { + raw.append(CODE_COLOR); + } else { + raw.append(DEFAULT_COLOR); + } + if (bold) { + raw.append("§l"); + } + if (underline) { + raw.append("§n"); + } + italic = false; + break; + case "dt": + raw.append("\n"); + //fallthrough + case "u": + if (code) { + raw.append(CODE_COLOR); + } else { + raw.append(DEFAULT_COLOR); + } + if (bold) { + raw.append("§l"); + } + if (italic) { + raw.append("§o"); + } + underline = false; + break; + case "code": + raw.append(DEFAULT_COLOR); + if (bold) { + raw.append("§l"); + } + if (italic) { + raw.append("§o"); + } + if (underline) { + raw.append("§n"); + } + code = false; + break; + case "ul": + case "ol": + listIndex = -1; + break; + case "dd": + case "li": + case "br": + case "p": + raw.append("\n"); + break; + } + } + } + matcher.appendTail(raw); + + if (raw.isEmpty()) { + return null; + } + + String rawStr = raw.toString(); + rawStr = rawStr.replace(""", "\""); + rawStr = rawStr.replace("'", "'"); + rawStr = rawStr.replace("<", "<"); + rawStr = rawStr.replace(">", ">"); + rawStr = rawStr.replace("&", "&"); + + return rawStr; + } +}