/* Copyright (C) 2016-2020 Nicola L.C. Talbot www.dickimaw-books.com This work may be distributed and/or modified under the conditions of the LaTeX Project Public License, either version 1.3 of this license or (at your option) any later version. The latest version of this license is in http://www.latex-project.org/lppl.txt and version 1.3 or later is part of all distributions of LaTeX version 2005/12/01 or later. */ package com.dickimawbooks.texosquery; import java.io.*; import java.util.Locale; import java.util.Calendar; import java.util.Date; import java.util.TimeZone; import java.util.Vector; import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.text.DecimalFormatSymbols; import java.text.Format; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.text.DateFormatSymbols; import java.text.NumberFormat; import java.text.DecimalFormat; import java.nio.charset.*; /** * Application functions. These methods need to be Java version 1.5 * compatible. The 1.7 methods need to be in the TeXOSQueryJRE7 class * (which provides the main part of texosquery.jar) and the 1.8 methods in * TeXOSQueryJRE8 (which provides the main part of texosquery-jre8.jar). * * The texosquery-jre5.jar version should not be considered secure * and is only provided for antiquated systems. * Java 5 and 6 are both deprecated and are now considered security * risks. * * Since this application is designed to be run from TeX, the output * needs to be easy to parse using TeX commands. For this reason, * most exceptions are caught and an empty string is returned. The * TeX code can then check for an empty value to determine failure. * There's a debug mode to print error messages to STDERR to * investigate the reason for failure. * @author Nicola Talbot * @version 1.2.1 * @since 1.0 */ public class TeXOSQuery implements Serializable { /** * Constructor. * @param name The application name. */ public TeXOSQuery(String name) { this.name = name; } /** * Gets the application name. * @return the application name * @since 1.2 */ public String getName() { return name; } /** * Runs kpsewhich and returns the result. This is for single * argument lookups through kpsewhich, such as a file location * or variable value. * @param arg The argument to pass to kpsewhich * @return The result read from the first line of STDIN * @since 1.2 */ protected String kpsewhich(String arg) throws IOException,InterruptedException { // Create and start the process. Process process = new ProcessBuilder("kpsewhich", arg).start(); int exitCode = process.waitFor(); String line = null; if (exitCode == 0) { // kpsewhich completed with exit code 0. // Read STDIN to find the result. InputStream stream = process.getInputStream(); if (stream == null) { throw new IOException(String.format( "Unable to open input stream from process: kpsewhich '%s'", arg)); } BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(stream)); // only read a single line, nothing further is required // for a variable or file location query. line = reader.readLine(); } finally { if (reader != null) { reader.close(); } } } else { // kpsewhich failed. throw new IOException(String.format( "\"kpsewhich '%s'\" failed with exit code: %d", arg, exitCode)); } return line; } /** * Print message if in debug mode. Message is printed to STDERR * if the debug level is greater than or equal to the given level. * Debugging messages are all written to STDERR rather than * STDOUT so they show up in the transcript rather than being * captured by the shell escape. * @param message Debugging message. * @param level Debugging level. * @since 1.2 */ public void debug(String message, int level) { if (debugLevel >= level) { System.err.println(String.format("%s: %s", name, message)); } } /** * Print message if in debug mode. Message is printed to STDERR * if the debug level is 1 or more. * @param message Debugging message. * @since 1.2 */ public void debug(String message) { debug(message, DEBUG_ERROR_LEVEL); } /** * Message if in debug mode. This is for information rather than * errors. The message is printed to STDERR if the debug level * is 3 or more. * @param message Debugging message. * @since 1.2 */ public void info(String message) { debug(message, DEBUG_INFO_LEVEL); } /** * Print message and exception if in debug mode. Message is printed to * STDERR if the debug level is greater than or equal to the given level. * The exception may be null. If not null, the exception message * is printed. * @param message Debugging message. * @param excpt Exception. * @param msgLevel Debugging level for message. * @param traceLevel Debugging level for stack trace. * @since 1.2 */ public void debug(String message, Throwable excpt, int msgLevel, int traceLevel) { debug(message, msgLevel); if (excpt != null) { debug(excpt.getMessage(), msgLevel); if (debugLevel >= traceLevel) { excpt.printStackTrace(); } } } /** * Print message and exception if in debug mode. The message * level is 1 and the trace level is 2. * @param message Debugging message. * @param excpt Exception. * @since 1.2 */ public void debug(String message, Throwable excpt) { debug(message, excpt, DEBUG_ERROR_LEVEL, DEBUG_STACK_TRACE_LEVEL); } /** * Checks if file is in or below the given directory. This might * be easier with java.nio.file.Path etc but that requires Java * 1.7, so use the old-fashioned method. * @param file The file being checked * @param dir The directory being searched * @return true if found * @since 1.2 */ protected boolean isFileInTree(File file, File dir) throws IOException { if (file == null || dir == null) return false; file = file.getCanonicalFile(); dir = dir.getCanonicalFile(); File parent = file.getParentFile(); while (parent != null) { if (parent.equals(dir)) { return true; } parent = parent.getParentFile(); } return false; } /** * Determine if the given file is hidden. * Java's File.isHidden() method seems to consider "." and ".." * as hidden directories, so this method converts the file to a * canonical path before testing. * @param file The file to check * @return True if the file is considered hidden. * @since 1.2 */ public boolean isHidden(File file) { try { return file.getCanonicalFile().isHidden(); } catch (IOException e) { // file can't be converted to a canonical path, so // consider it hidden debug(String.format( "Unable to convert file to a canonical path: ", file.toString()), e); } return true; } /** * Fallback for openin_any if not found. * @since 1.2.2 */ public char openinFallbackValue() { return OPENIN_A; } /** * Queries if the given file may be read according to * openin_any. Since the user may not require any of the file * access functions, the openin variable is only set the first * time this method is used to reduce unnecessary overhead. * kpsewhich is used to lookup the value of openin_any, which * may have one of the following values: a (any), r (restricted, * no hidden files) or p (paranoid, as restricted and no parent * directories and no absolute paths except under $TEXMFOUTPUT). * Apparently with MikTeX, this variable isn't available, so we * need a fallback for that case. * @param file The file to be checked * @return true if read-access allowed * @since 1.2 */ public boolean isReadPermitted(File file) { // if file doesn't exist, it can't be read if (file == null || !file.exists()) { return false; } try { if (openin == OPENIN_UNSET) { //First time this method has been called. Use kpsewhich //to determine the value. try { String result = kpsewhich("-var-value=openin_any"); if ("a".equals(result)) { openin=OPENIN_A; } else if ("r".equals(result)) { openin=OPENIN_R; } else if ("p".equals(result)) { openin=OPENIN_P; } else { // openin_any variable hasn't been set, use the // fallback value. openin = openinFallbackValue(); debug(String.format( "Invalid openin_any value: %s%nUsing fallback value: %s", result, openin)); } } catch (Exception e) { // kpsewhich failed, assume paranoid debug("Can't determine openin value, assuming 'p'", e); openin = OPENIN_P; } // Now find TEXMFOUTPUT if set (only need this with the // paranoid setting) if (openin == OPENIN_P) { String path = null; try { path = System.getenv("TEXMFOUTPUT"); } catch (SecurityException e) { debug("Can't query TEXMFOUTPUT", e); } if (path != null && !"".equals(path)) { texmfoutput = new File(fromTeXPath(path)); if (!texmfoutput.exists()) { debug(String.format( "TEXMFOUTPUT doesn't exist, ignoring: %s", texmfoutput.toString())); texmfoutput = null; } else if (!texmfoutput.isDirectory()) { debug(String.format( "TEXMFOUTPUT isn't a directory, ignoring: %s", texmfoutput.toString())); texmfoutput = null; } else if (!texmfoutput.canRead()) { debug(String.format( "TEXMFOUTPUT doesn't have read permission, ignoring: %s", texmfoutput.toString())); texmfoutput = null; } } } } // Now check if the given file can be read according to the // openin setting. switch (openin) { case OPENIN_A: // any file can be read as long as the OS allows it return file.canRead(); case OPENIN_P: // paranoid check if (isFileInTree(file, texmfoutput)) { // file under TEXMFOUTPUT, so it's okay as long // as it has read permission return file.canRead(); } // does the file have an absolute path? if (file.isAbsolute()) { debug(String.format( "Read access forbidden by openin_any=%c (has absolute path outside TEXMFOUTPUT): %s", openin, file)); return false; } // is the file outside the cwd? File cwd = new File(getSystemProperty("user.dir", ".")); if (file.getParentFile() != null && !isFileInTree(file, cwd)) { debug(String.format( "Read access forbidden by openin_any=%c (outside cwd path): %s", openin, file)); return false; } // no break, fall through to restricted check case OPENIN_R: if (isHidden(file)) { // hidden file so not permitted debug(String.format( "Read access forbidden by openin_any=%c (hidden file): %s", openin, file)); return false; } break; default: // this shouldn't happen, but just in case... debug(String.format("Invalid openin value: %d", openin)); // don't allow, something's gone badly wrong return false; } // return read access return file.canRead(); } catch (Exception e) { // Catch all exceptions debug(String.format("Read permission check failed: %s", file), e); // Can't permit read if something's gone wrong here. return false; } } /** * Gets the given system property or the default value. * Returns the default value if the property isn't set or can't be accessed. * @param propName The property name * @param defValue The default value * @return The property value or the default if unavailable * @since 1.2 */ public String getSystemProperty(String propName, String defValue) { try { return System.getProperty(propName, defValue); } catch (SecurityException e) { // The security manager doesn't permit access to this property. debug(String.format("Unable to access property: %s", propName), e); return defValue; } } /** * Escapes potentially problematic characters from a string that will be * expanded when input by TeX's shell escape. * * Some of the methods in this class return TeX code. Those * returned values shouldn't be escaped as it would interfere * with the code, so just use this method on information * directly obtained from Java. This will typically be either * file names (in which case the characters within the string * must all be "letter" or "other") or regular text for use in * the document (such as dates or times, in which case the * characters may be active to allow them to be correctly * typeset, such as UTF-8 characters with inputenc.sty). * * The date-time and numeric patterns (such as "YYYY-MM-DD" * or "#,##0.0") are dealt with elsewhere as they need different treatment. * * \\TeXOSQuery locally defines commands for characters * used in file names (catcode 12). These are all in the form * \\fxxx (such as \\fhsh for a literal hash). Since the * texosquery.tex code is designed to be generic we can't assume * the eTeX \\detokenize primitive is available. This does, * however, assume that the document author hasn't changed the * category codes of the ASCII alphanumerics, but that ought to * be a safe assumption. * * We also have commands for characters intended for use in document * text, which shouldn't be interpreted literally. These are all * in the form \\txxx (such as \\thsh which should expand to * \#). * * The regular space \\tspc guards against a space occurring after * a character that needs to be converted to a control sequence. * (For example "# 1" becomes "\\thsh \\tspc 1") * There's also a literal space \\fspc to guard against spaces * in file names. * * This should take care of any insane file-naming schemes, such * as bad~file name#1.tex, stupid {file} name.tex, * spaced out file #2.tex, * file's stupid name.tex. * * To help protect against input encoding problems, non-ASCII * characters are wrapped in \\twrp (regular text) or \\fwrp * (file names). \\TeXOSQuery locally redefines these to * \\texosquerynonasciiwrap and \\texosquerynonasciidetokwrap * which may be used to provide some protection or conversion from one * encoding to another, if required. * * For example, the language "français" would be returned as * "fran\\twrp{ç}ais", which can be typeset directly with * XeTeX or LuaTeX or through active characters with * inputenc.sty, but the directory called Françcois would be * returned as Fran\\fwrp{ç}cois, which will try to * detokenize the ç character. * * @param string Input string. * @param isRegularText true if the string represents text (for example, * month names), set to false if string is something literal, * such as a file name. * @return The processed string * @since 1.2 */ public String escapeSpChars(String string, boolean isRegularText) { if (compatible < 2) { return escapeHash(string); } StringBuilder builder = new StringBuilder(); // This iterates over Unicode characters so we can't use a simple // i++ increment. The offset is obtained from Character.charCount for (int i = 0, n = string.length(); i < n; ) { int codepoint = string.codePointAt(i); i += Character.charCount(codepoint); builder.append(escapeSpChars(codepoint, isRegularText)); } return builder.toString(); } /** * Escapes file name. This should already have had the directory * divider changed to a forward slash where necessary. * @param filename Input string. * @return String with characters escaped. * @since 1.2 */ public String escapeFileName(String filename) { return escapeSpChars(filename, false); } /** * Escapes regular text. * @param string Input string. * @return String with characters escaped. * @since 1.2 */ public String escapeText(String string) { return escapeSpChars(string, true); } /** * Escapes regular text. * @param codepoint Input Unicode character. * @return String with characters escaped. * @since 1.2 */ public String escapeText(int codepoint) { return escapeSpChars(codepoint, true); } /** * Escapes the given Unicode character. * All ASCII punctuation characters have a literal and textual * command to represent them in file names and document text, * respectively. The literal (file name) commands are prefixed * with "f" and the textual commands are prefixed with "t". * None of the control codes should appear in any of the * results, but they are checked for completeness. * @param codePoint Input code point. * @param isRegularText true if the character is in a string representing * text, set to false if string is a file name etc * @return String with character escaped. * @since 1.2 */ public String escapeSpChars(int codepoint, boolean isRegularText) { return escapeSpChars(codepoint, isRegularText ? "t" : "f"); } /** * Escapes the given Unicode character. * As above but with the prefix supplied. * @param codePoint Input code point. * @param prefix The control sequence name prefix. * @return String with character escaped. * @since 1.2 */ public String escapeSpChars(int codepoint, String prefix) { switch (codepoint) { case '!': return String.format("\\%sexc ", prefix); case '"': return String.format("\\%sdqt ", prefix); case '#': return String.format("\\%shsh ", prefix); case '$': return String.format("\\%sdol ", prefix); case '%': return String.format("\\%spct ", prefix); case '&': return String.format("\\%samp ", prefix); case '\'': return String.format("\\%sapo ", prefix); case '(': return String.format("\\%sopb ", prefix); case ')': return String.format("\\%sclb ", prefix); case '*': return String.format("\\%sast ", prefix); case '+': return String.format("\\%spls ", prefix); case ',': return String.format("\\%scom ", prefix); case '-': return String.format("\\%shyn ", prefix); case '.': return String.format("\\%sdot ", prefix); case '/': return String.format("\\%sslh ", prefix); case ':': return String.format("\\%scln ", prefix); case ';': return String.format("\\%sscl ", prefix); case '<': return String.format("\\%sles ", prefix); case '=': return String.format("\\%seql ", prefix); case '>': return String.format("\\%sgre ", prefix); case '?': return String.format("\\%sque ", prefix); case '@': return String.format("\\%satc ", prefix); case '[': return String.format("\\%sosb ", prefix); case '\\': return String.format("\\%sbks ", prefix); case ']': return String.format("\\%scsb ", prefix); case '^': return String.format("\\%scir ", prefix); case '_': return String.format("\\%susc ", prefix); case '`': return String.format("\\%sgrv ", prefix); case '{': return String.format("\\%slbr ", prefix); case '}': return String.format("\\%srbr ", prefix); case '~': return String.format("\\%stld ", prefix); case ' ': return String.format("\\%sspc ", prefix); // These next few cases shouldn't occur, but // check for them anyway. case 0x007F: return ""; // delete control case 0x0009: return "^^I";// tab case 0x000A: // lf (fall through to cr) case 0x000C: // ff case 0x000D: return " "; // cr default: if (codepoint < 32) { return ""; // strip control characters } else if (codepoint >= 32 && codepoint <= 126) { // ASCII letters and digits (all ASCII punctuation // dealt with above). return String.format("%c", codepoint); } else { // Outside Basic Latin set. return String.format("\\%swrp{%c}", prefix, codepoint); } } } /** * Escapes any hashes in input string. * Now only used if compatibility level is less than 2 (pre * texosquery version 1.2). * @param string Input string. * @return String with hash escaped. */ public static String escapeHash(String string) { return string.replaceAll("#", "\\\\#"); } /** * Escapes hash from input character. * No longer required. * @param c Input character. * @return String with hash escaped. */ public static String escapeHash(char c) { return String.format("%s", c == '#' ? "\\#" : c); } /** * Gets the OS name. As far as I can tell, the "os.name" * property should return a string that just contains Basic * Latin upper or lower case letters, so we don't need to worry * about special characters. * @return The OS name as string. */ public String getOSname() { return getSystemProperty("os.name", ""); } /** * Gets the OS architecture. As with the OS name, this shouldn't * contain any special characters. * @return The OS architecture as string. */ public String getOSarch() { return getSystemProperty("os.arch", ""); } /** * Gets the OS version. This may contain an underscore, so treat * it like a file name. * @return The OS version as string. */ public String getOSversion() { return escapeFileName(getSystemProperty("os.version", "")); } /** * Converts the filename string to TeX path. Since this is designed to work * within TeX, backslashes in paths need to be replaced with forward * slashes. * @param filename The filename string. * @return TeX path. */ public String toTeXPath(String filename) { if (filename == null) { // This shouldn't happen, but just in case... try { // throw so we can get a stack trace for debugging throw new NullPointerException(); } catch (NullPointerException e) { debug("null file name", e); } return ""; } // If the OS uses backslash as the directory divider, // convert all backslashes to forward slashes. The Java regex // means that we need four backslashes to represent a single literal // backslash. if (File.separatorChar == BACKSLASH) { filename = filename.replaceAll("\\\\", "/"); } // Does a prefix need stripping? if (stripFilePrefix != null && filename.startsWith(stripFilePrefix)) { filename = filename.substring(stripFilePrefix.length()); } else if (pathRegExp != null && pathReplacement != null) { filename = filename.replaceFirst(pathRegExp, pathReplacement); } return escapeFileName(filename); } /** * Converts the TeX path to the OS representation. * The file name will typically be passed as a parameter through * \\TeXOSQuery so it will have forward slashes as the directory * divider regardless of the OS (as per \\input and * \\includegraphics). This method converts the TeX file name * into one that's valid for the OS. * @param filename The filename string. * @return The OS representation. */ public String fromTeXPath(String filename) { if (filename == null) { // This shouldn't happen, but just in case... try { throw new NullPointerException(); } catch (NullPointerException e) { debug("null file name", e); } return ""; } if (compatible < 2) { if (File.separatorChar == BACKSLASH) { return filename.replaceAll("/", "\\\\"); } return filename; } // The file name may contain awkward characters. For example, // the user may have a file called imagefile#1.png and // they're trying to do, say, // \TeXOSQuery{\result}{-p imagefile#1.png} // If the shell escape is using bash, the hash will be // interpreted as a comment character, so the argument // received by texosquery will actually be "imagefile" // since the "#1.png" part will be interpreted as a comment. // The user can protect the # from the shell using // \TeXOSQuery{\result}{-p imagefile\string\#1.png} // which bash will pass as 'imagefile#1.png', but // perhaps another type of shell might pass it literally // as 'imagefile\#1.png', so the following allows for // that by simply stripping all backslashes from the file name. // (The file name is always supplied with forward slashes as // the directory divider regardless of the operating system.) // We can substitute the divider at this point as well. StringBuilder builder = new StringBuilder(); for (int i = 0, n = filename.length(), offset=1; i < n; i+=offset) { int codepoint = filename.codePointAt(i); offset = Character.charCount(codepoint); int nextIndex = i+offset; int nextCodePoint = (nextIndex