138 lines
4.8 KiB
Java
138 lines
4.8 KiB
Java
package jmri.util;
|
|
|
|
import java.io.*;
|
|
import java.nio.file.*;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Stream;
|
|
|
|
import org.junit.jupiter.api.*;
|
|
|
|
/**
|
|
* Check help files for UTF-8 characters.
|
|
* Files that contain &l;tmeta charset="utf-8"> are exempt.
|
|
*
|
|
* @author Daniel Bergqvist Copyright (C) 2022
|
|
*/
|
|
public class CheckHelpFilesForUTF8Test {
|
|
|
|
private final Map<Integer, String> convertChar = new HashMap<>();
|
|
private final Set<Integer> foundChar = new HashSet<>();
|
|
private int numErrors = 0;
|
|
|
|
|
|
private void searchFolder(String folder) throws IOException {
|
|
Path path = FileSystems.getDefault().getPath(folder);
|
|
Set<String> files = Stream.of(path.toFile().listFiles())
|
|
.filter(file -> !file.isDirectory())
|
|
.map(File::getName)
|
|
.collect(Collectors.toSet());
|
|
|
|
for (String file : files) {
|
|
if (file.endsWith(".shtml")) {
|
|
String fileName = folder + file;
|
|
|
|
var lines = Files.readAllLines(Paths.get(fileName), StandardCharsets.UTF_8);
|
|
for (String s : lines) {
|
|
if (s.contains("<meta charset=\"utf-8\">")) break; // no further testing for UTF
|
|
s.codePoints().forEach((codePoint) -> {
|
|
if (codePoint > 127) {
|
|
numErrors++;
|
|
foundChar.add(codePoint);
|
|
String expected = convertChar.get(codePoint);
|
|
log.error(
|
|
"Invalid character. Codepoint: {}, Character: {}, Replace with: {}, File: {}",
|
|
codePoint, new String(Character.toChars(codePoint)), expected, fileName);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
Set<String> folders = Stream.of(path.toFile().listFiles())
|
|
.filter(file -> file.isDirectory())
|
|
.map(File::getName)
|
|
.collect(Collectors.toSet());
|
|
|
|
for (String aFolder : folders) {
|
|
searchFolder(folder + aFolder + "/");
|
|
}
|
|
|
|
}
|
|
|
|
@Test
|
|
public void testGenerateSearchIndex() throws IOException {
|
|
// See: https://www.w3schools.com/charsets/ref_utf_punctuation.asp
|
|
convertChar.put(169, "©");
|
|
convertChar.put(174, "®");
|
|
convertChar.put(176, "°");
|
|
convertChar.put(200, "È");
|
|
convertChar.put(201, "É");
|
|
convertChar.put(220, "Ü");
|
|
convertChar.put(223, "ß");
|
|
convertChar.put(224, "à");
|
|
convertChar.put(225, "á");
|
|
convertChar.put(226, "â");
|
|
convertChar.put(228, "ä");
|
|
convertChar.put(229, "å");
|
|
convertChar.put(230, "æ");
|
|
convertChar.put(231, "ç");
|
|
convertChar.put(232, "è");
|
|
convertChar.put(233, "é");
|
|
convertChar.put(234, "ê");
|
|
convertChar.put(237, "í");
|
|
convertChar.put(241, "ñ");
|
|
convertChar.put(244, "ô");
|
|
convertChar.put(246, "ö");
|
|
convertChar.put(248, "ø");
|
|
convertChar.put(252, "ü");
|
|
convertChar.put(253, "ý");
|
|
convertChar.put(268, "Č");
|
|
convertChar.put(283, "ě");
|
|
convertChar.put(339, "œ");
|
|
convertChar.put(345, "ř");
|
|
convertChar.put(352, "Š");
|
|
convertChar.put(381, "Ž");
|
|
convertChar.put(8209, "‑");
|
|
convertChar.put(8211, "–");
|
|
convertChar.put(8212, "—");
|
|
convertChar.put(8216, "‘");
|
|
convertChar.put(8217, "’");
|
|
convertChar.put(8220, "“");
|
|
convertChar.put(8221, "”");
|
|
convertChar.put(8226, "•");
|
|
convertChar.put(8230, "…");
|
|
convertChar.put(8250, "›");
|
|
convertChar.put(8482, "™");
|
|
convertChar.put(8594, "→");
|
|
convertChar.put(8629, "↵");
|
|
convertChar.put(8658, "⇒");
|
|
convertChar.put(9662, "▾");
|
|
convertChar.put(10004, "✔");
|
|
|
|
searchFolder("help/en/");
|
|
|
|
for (int codePoint : foundChar) {
|
|
String expected = convertChar.get(codePoint);
|
|
log.error("Found UTF-8 Codepoint: {}, Character: {}. Expected: {}",
|
|
codePoint, new String(Character.toChars(codePoint)), expected);
|
|
}
|
|
|
|
if (numErrors > 0) log.error("Num errors: {}", numErrors);
|
|
}
|
|
|
|
@BeforeEach
|
|
public void setUp() {
|
|
JUnitUtil.setUp();
|
|
}
|
|
|
|
@AfterEach
|
|
public void tearDown() {
|
|
JUnitUtil.tearDown();
|
|
}
|
|
|
|
private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(CheckHelpFilesForUTF8Test.class);
|
|
|
|
}
|