diff --git a/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java b/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java index 27a653421312..c74e56bce890 100644 --- a/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java +++ b/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java @@ -10,7 +10,6 @@ import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -18,6 +17,7 @@ import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import org.dspace.app.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -49,8 +49,9 @@ private RegistryImporter() { } */ public static Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This XML builder will *not* disable external entities as XML + // registries are considered trusted content + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); Document document = builder.parse(new File(filename)); diff --git a/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java b/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java index bbf320a0d5e5..80ded7313b48 100644 --- a/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java +++ b/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java @@ -13,7 +13,6 @@ import java.util.ArrayList; import java.util.Arrays; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -22,6 +21,7 @@ import javax.xml.xpath.XPathFactory; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.BitstreamFormat; import org.dspace.content.factory.ContentServiceFactory; @@ -210,8 +210,9 @@ private static void loadFormat(Context context, Node node) */ private static Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This XML builder will *not* disable external entities as XML + // registries are considered trusted content + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); return builder.parse(new File(filename)); } diff --git a/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java b/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java index 13a1b3b5bbf8..c877e59b75d9 100644 --- a/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java +++ b/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java @@ -27,7 +27,6 @@ import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -43,6 +42,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -613,8 +613,8 @@ private static String validateCollections(NodeList collections, int level) */ private static org.w3c.dom.Document loadXML(InputStream input) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This builder factory does not disable external DTD, entities, etc. + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); org.w3c.dom.Document document = builder.parse(input); diff --git a/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java b/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java index a9cfce2fb493..20f165731aec 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java @@ -30,6 +30,7 @@ import java.io.OutputStream; import java.io.PrintWriter; import java.net.URL; +import java.nio.file.Path; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -50,7 +51,6 @@ import java.util.zip.ZipFile; import javax.mail.MessagingException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -70,6 +70,7 @@ import org.dspace.app.itemimport.service.ItemImportService; import org.dspace.app.util.LocalSchemaFilenameFilter; import org.dspace.app.util.RelationshipUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.ResourcePolicy; import org.dspace.authorize.service.AuthorizeService; @@ -189,6 +190,8 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea @Autowired(required = true) protected ClarinLicenseResourceMappingService clarinLicenseResourceMappingService; + protected DocumentBuilder builder; + protected String tempWorkDir; protected boolean isTest = false; @@ -783,15 +786,22 @@ protected Item addItem(Context c, List mycollections, String path, myitem = wi.getItem(); } + // normalize and validate path to make sure itemname doesn't contain path traversal + Path itemPath = new File(path + File.separatorChar + itemname + File.separatorChar) + .toPath().normalize(); + if (!itemPath.startsWith(path)) { + throw new IOException("Illegal item metadata path: '" + itemPath); + } + // Normalization chops off the last separator, and we need to put it back + String itemPathDir = itemPath.toString() + File.separatorChar; + // now fill out dublin core for item - loadMetadata(c, myitem, path + File.separatorChar + itemname - + File.separatorChar); + loadMetadata(c, myitem, itemPathDir); // and the bitstreams from the contents file // process contents file, add bistreams and bundles, return any // non-standard permissions - List options = processContentsFile(c, myitem, path - + File.separatorChar + itemname, "contents"); + List options = processContentsFile(c, myitem, itemPathDir, "contents"); if (useWorkflow) { // don't process handle file @@ -809,8 +819,7 @@ protected Item addItem(Context c, List mycollections, String path, } } else { // only process handle file if not using workflow system - String myhandle = processHandleFile(c, myitem, path - + File.separatorChar + itemname, "handle"); + String myhandle = processHandleFile(c, myitem, itemPathDir, "handle"); // put item in system if (!isTest) { @@ -1037,6 +1046,34 @@ protected void addDCValue(Context c, Item i, String schema, Node n) } } + /** + * Ensures a file path does not attempt to access files outside the designated parent directory. + * + * @param parentDir The absolute path to the parent directory that should contain the file + * @param fileName The name or path of the file to validate + * @throws IOException If an error occurs while resolving canonical paths, or the file path attempts + * to access a location outside the parent directory + */ + private void validateFilePath(String parentDir, String fileName) throws IOException { + File parent = new File(parentDir); + File file = new File(fileName); + + // If the fileName is not an absolute path, we resolve it against the parentDir + if (!file.isAbsolute()) { + file = new File(parent, fileName); + } + + String parentCanonicalPath = parent.getCanonicalPath(); + String fileCanonicalPath = file.getCanonicalPath(); + + if (!fileCanonicalPath.startsWith(parentCanonicalPath)) { + log.error("File path outside of canonical root requested: fileCanonicalPath={} does not begin " + + "with parentCanonicalPath={}", fileCanonicalPath, parentCanonicalPath); + throw new IOException("Illegal file path '" + fileName + "' encountered. This references a path " + + "outside of the import package. Please see the system logs for more details."); + } + } + /** * Read the collections file inside the item directory. If there * is one and it is not empty return a list of collections in @@ -1237,6 +1274,7 @@ protected List processContentsFile(Context c, Item i, String path, sDescription = sDescription.replaceFirst("description:", ""); } + validateFilePath(path, sFilePath); registerBitstream(c, i, iAssetstore, sFilePath, sBundle, sDescription); logInfo("\tRegistering Bitstream: " + sFilePath + "\tAssetstore: " + iAssetstore @@ -1450,6 +1488,7 @@ protected void processContentFileEntry(Context c, Item i, String path, return; } + validateFilePath(path, fileName); String fullpath = path + File.separatorChar + fileName; // get an input stream @@ -1922,9 +1961,7 @@ protected String getStringValue(Node node) { */ protected Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); - + DocumentBuilder builder = XMLUtils.getDocumentBuilder(); return builder.parse(new File(filename)); } diff --git a/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java b/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java index 26de45caf77e..7dda65a0a75b 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java +++ b/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java @@ -23,8 +23,6 @@ import java.util.Iterator; import java.util.List; import java.util.UUID; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; @@ -33,6 +31,7 @@ import org.apache.logging.log4j.Logger; import org.dspace.app.util.LocalSchemaFilenameFilter; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -52,7 +51,6 @@ public class ItemArchive { public static final String DUBLIN_CORE_XML = "dublin_core.xml"; - protected static DocumentBuilder builder = null; protected Transformer transformer = null; protected List dtomList = null; @@ -95,14 +93,14 @@ public static ItemArchive create(Context context, File dir, String itemField) InputStream is = null; try { is = new FileInputStream(new File(dir, DUBLIN_CORE_XML)); - itarch.dtomList = MetadataUtilities.loadDublinCore(getDocumentBuilder(), is); + itarch.dtomList = MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is); //The code to search for local schema files was copied from org.dspace.app.itemimport // .ItemImportServiceImpl.java File file[] = dir.listFiles(new LocalSchemaFilenameFilter()); for (int i = 0; i < file.length; i++) { is = new FileInputStream(file[i]); - itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(getDocumentBuilder(), is)); + itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is)); } } finally { if (is != null) { @@ -126,14 +124,6 @@ public static ItemArchive create(Context context, File dir, String itemField) return itarch; } - protected static DocumentBuilder getDocumentBuilder() - throws ParserConfigurationException { - if (builder == null) { - builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - } - return builder; - } - /** * Getter for Transformer * @@ -318,7 +308,7 @@ public void writeUndo(File undoDir) try { out = new FileOutputStream(new File(dir, "dublin_core.xml")); - Document doc = MetadataUtilities.writeDublinCore(getDocumentBuilder(), undoDtomList); + Document doc = MetadataUtilities.writeDublinCore(XMLUtils.getDocumentBuilder(), undoDtomList); MetadataUtilities.writeDocument(doc, getTransformer(), out); // if undo has delete bitstream diff --git a/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java b/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java index fcb2098bd066..7491e8f92237 100644 --- a/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java +++ b/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java @@ -19,6 +19,7 @@ import org.apache.commons.cli.ParseException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.core.Context; import org.dspace.scripts.DSpaceRunnable; import org.dspace.scripts.configuration.ScriptConfiguration; @@ -308,7 +309,7 @@ public static Document getConfig(DSpaceKernelImpl kernelImpl) { String config = kernelImpl.getConfigurationService().getProperty("dspace.dir") + System.getProperty("file.separator") + "config" + System.getProperty("file.separator") + "launcher.xml"; - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document doc = null; try { doc = saxBuilder.build(config); diff --git a/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java b/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java index 184f00a53e59..d3b447374a2c 100644 --- a/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.dspace.app.sfx.service.SFXFileReaderService; +import org.dspace.app.util.XMLUtils; import org.dspace.content.DCPersonName; import org.dspace.content.Item; import org.dspace.content.MetadataValue; @@ -79,9 +80,9 @@ public Document parseFile(String fileName) { log.info("Parsing XML file... " + fileName); DocumentBuilder docBuilder; Document doc = null; - DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - docBuilderFactory.setIgnoringElementContentWhitespace(true); try { + DocumentBuilderFactory docBuilderFactory = XMLUtils.getDocumentBuilderFactory(); + docBuilderFactory.setIgnoringElementContentWhitespace(true); docBuilder = docBuilderFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { log.error("Wrong parser configuration: " + e.getMessage()); diff --git a/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java b/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java index 4502312fa61a..a06dd4f90f7c 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java +++ b/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java @@ -16,7 +16,6 @@ import java.util.Map; import javax.servlet.ServletException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.FactoryConfigurationError; import org.apache.commons.lang3.StringUtils; @@ -129,15 +128,17 @@ private void buildInputs(String fileName) valuePairs = new HashMap>(); complexDefinitions = new DCInput.ComplexDefinitions(valuePairs); - String uri = "file:" + new File(fileName).getAbsolutePath(); + File inputFile = new File(fileName); + String inputFileDir = inputFile.toPath().normalize().getParent().toString(); - try { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); + String uri = "file:" + inputFile.getAbsolutePath(); - DocumentBuilder db = factory.newDocumentBuilder(); + try { + // This document builder will *not* disable external + // entities as they can be useful in managing large forms, but + // it will restrict them to be within the directory that the + // current input form XML file exists (or a sub-directory) + DocumentBuilder db = XMLUtils.getTrustedDocumentBuilder(inputFileDir); Document doc = db.parse(uri); doNodes(doc); checkValues(); diff --git a/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java b/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java index 0a072a9819eb..5f58660c4335 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java +++ b/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java @@ -11,7 +11,6 @@ import java.io.IOException; import java.sql.SQLException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.cli.CommandLine; @@ -105,8 +104,9 @@ private void run(String fileLocation) throws SQLException, AuthorizeException { private void parseXMLToRelations(Context context, String fileLocation) throws AuthorizeException { try { File fXmlFile = new File(fileLocation); - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); + // This XML builder will allow external entities, so the relationship types XML should + // be considered trusted by administrators + DocumentBuilder dBuilder = XMLUtils.getTrustedDocumentBuilder(); Document doc = dBuilder.parse(fXmlFile); doc.getDocumentElement().normalize(); diff --git a/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java b/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java index 21208483583e..f691f038d724 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java +++ b/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java @@ -15,7 +15,6 @@ import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.FactoryConfigurationError; import org.apache.commons.lang3.StringUtils; @@ -140,13 +139,10 @@ private void buildInputs(String fileName) throws SubmissionConfigReaderException String uri = "file:" + new File(fileName).getAbsolutePath(); try { - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder db = factory.newDocumentBuilder(); + // This document builder factory will *not* disable external + // entities as they can be useful in managing large forms, but + // it will restrict them to the config dir containing submission definitions + DocumentBuilder db = XMLUtils.getTrustedDocumentBuilder(configDir); Document doc = db.parse(uri); doNodes(doc); } catch (FactoryConfigurationError fe) { diff --git a/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java b/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java index c39d0d26fd5e..6b419a0485e8 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java +++ b/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java @@ -7,12 +7,26 @@ */ package org.dspace.app.util; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.XMLInputFactory; import org.apache.commons.lang3.StringUtils; +import org.jdom2.input.SAXBuilder; import org.w3c.dom.Element; import org.w3c.dom.NodeList; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** * Simple class to read information from small XML using DOM manipulation @@ -161,4 +175,195 @@ public static List getElementValueArrayList(Element rootElement, } return result; } + + /** + * Initialize and return a javax DocumentBuilderFactory with NO security + * applied. This is intended only for internal, administrative/configuration + * use where external entities and other dangerous features are actually + * purposefully included. + * The method here is tiny, but may be expanded with other features like + * whitespace handling, and calling this method name helps to document + * the fact that the caller knows it is trusting the XML source / factory. + * + * @return document builder factory to generate new builders + * @throws ParserConfigurationException + */ + public static DocumentBuilderFactory getTrustedDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + return factory; + } + + /** + * Initialize and return the javax DocumentBuilderFactory with some basic security + * applied to avoid XXE attacks and other unwanted content inclusion + * @return document builder factory to generate new builders + * @throws ParserConfigurationException + */ + public static DocumentBuilderFactory getDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + // No DOCTYPE / DTDs + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + // No external general entities + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + // No external parameter entities + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + // No external DTDs + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + // Even if entities somehow get defined, they will not be expanded + factory.setExpandEntityReferences(false); + // Disable "XInclude" markup processing + factory.setXIncludeAware(false); + + return factory; + } + + /** + * Initialize and return a javax DocumentBuilder with less security + * applied. This is intended only for internal, administrative/configuration + * use where external entities and other dangerous features are actually + * purposefully included, but are only allowed from specified paths, e.g. + * dspace.dir or some other path specified by the java caller. + * The method here is tiny, but may be expanded with other features like + * whitespace handling, and calling this method name helps to document + * the fact that the caller knows it is trusting the XML source / builder + *

+ * If no allowedPaths are passed, then all external entities are rejected + * + * @return document builder with no security features set + * @throws ParserConfigurationException if the builder can not be configured + */ + public static DocumentBuilder getTrustedDocumentBuilder(String... allowedPaths) + throws ParserConfigurationException { + DocumentBuilderFactory factory = getTrustedDocumentBuilderFactory(); + factory.setValidating(false); + factory.setIgnoringComments(true); + factory.setIgnoringElementContentWhitespace(true); + DocumentBuilder builder = factory.newDocumentBuilder(); + builder.setEntityResolver(new PathRestrictedEntityResolver(allowedPaths)); + return factory.newDocumentBuilder(); + } + + /** + * Initialize and return the javax DocumentBuilder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return document builder for use in XML parsing + * @throws ParserConfigurationException if the builder can not be configured + */ + public static DocumentBuilder getDocumentBuilder() + throws ParserConfigurationException { + return getDocumentBuilderFactory().newDocumentBuilder(); + } + + /** + * Initialize and return the SAX document builder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return SAX document builder for use in XML parsing + */ + public static SAXBuilder getSAXBuilder() { + return getSAXBuilder(false); + } + + /** + * Initialize and return the SAX document builder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @param validate whether to use JDOM XSD validation + * @return SAX document builder for use in XML parsing + */ + public static SAXBuilder getSAXBuilder(boolean validate) { + SAXBuilder saxBuilder = new SAXBuilder(); + if (validate) { + saxBuilder.setValidation(true); + } + // No DOCTYPE / DTDs + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + // No external general entities + saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); + // No external parameter entities + saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + // No external DTDs + saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + // Don't expand entities + saxBuilder.setExpandEntities(false); + + return saxBuilder; + } + + /** + * Initialize and return the Java XML Input Factory with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return XML input factory for use in XML parsing + */ + public static XMLInputFactory getXMLInputFactory() { + XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); + xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + + return xmlInputFactory; + } + + /** + * This entity resolver accepts one or more path strings in its + * constructor and throws a SAXException if the entity systemID + * is not within the allowed path (or a subdirectory). + * If no parameters are passed, then this effectively disallows + * any external entity resolution. + */ + public static class PathRestrictedEntityResolver implements EntityResolver { + private final List allowedBasePaths; + + public PathRestrictedEntityResolver(String... allowedBasePaths) { + this.allowedBasePaths = Arrays.asList(allowedBasePaths); + } + + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + + if (systemId == null) { + return null; + } + + String filePath; + if (systemId.startsWith("file://")) { + filePath = systemId.substring(7); + } else if (systemId.startsWith("file:")) { + filePath = systemId.substring(5); + } else if (!systemId.contains("://")) { + filePath = systemId; + } else { + throw new SAXException("External resources not allowed: " + systemId + + ". Only local file paths are permitted."); + } + + Path resolvedPath; + try { + resolvedPath = Paths.get(filePath).toAbsolutePath().normalize(); + } catch (Exception e) { + throw new SAXException("Invalid path: " + systemId, e); + } + + boolean isAllowed = false; + for (String basePath : allowedBasePaths) { + Path allowedPath = Paths.get(basePath).toAbsolutePath().normalize(); + if (resolvedPath.startsWith(allowedPath)) { + isAllowed = true; + break; + } + } + + if (!isAllowed) { + throw new SAXException("Access denied to path: " + resolvedPath); + } + + File file = resolvedPath.toFile(); + if (!file.exists() || !file.canRead()) { + throw new SAXException("File not found or not readable: " + resolvedPath); + } + + return new InputSource(new FileInputStream(file)); + } + } + + } diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java index b8a4a8aef390..5ceacc933e4c 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java @@ -14,6 +14,7 @@ import java.util.List; import org.apache.commons.lang3.ArrayUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.packager.PackageDisseminator; @@ -129,7 +130,7 @@ public Element disseminateElement(Context context, DSpaceObject dso) try { //Return just the root Element of the METS file - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document metsDocument = builder.build(tempFile); return metsDocument.getRootElement(); } catch (JDOMException je) { diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java index 1e63be5ba1b9..205b3ef5b343 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java @@ -22,6 +22,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -144,7 +145,7 @@ public static String[] getPluginNames() { MODS_NS.getURI() + " " + MODS_XSD; private static final XMLOutputter outputUgly = new XMLOutputter(); - private static final SAXBuilder builder = new SAXBuilder(); + private static final SAXBuilder builder = XMLUtils.getSAXBuilder(); private Map modsMap = null; diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/OREIngestionCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/OREIngestionCrosswalk.java index f756aae22577..9e890a6046fa 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/OREIngestionCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/OREIngestionCrosswalk.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.io.InputStream; import java.net.ConnectException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.sql.SQLException; import java.text.NumberFormat; @@ -18,6 +20,8 @@ import java.util.Date; import java.util.HashSet; import java.util.List; +import java.util.Locale; +import java.util.Objects; import java.util.Set; import org.apache.logging.log4j.Logger; @@ -34,6 +38,8 @@ import org.dspace.content.service.ItemService; import org.dspace.core.Constants; import org.dspace.core.Context; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; import org.jdom2.Attribute; import org.jdom2.Document; import org.jdom2.Element; @@ -76,6 +82,7 @@ public class OREIngestionCrosswalk .getBitstreamFormatService(); protected BundleService bundleService = ContentServiceFactory.getInstance().getBundleService(); protected ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + protected ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); @Override @@ -173,9 +180,13 @@ public void ingest(Context context, DSpaceObject dso, Element root, boolean crea try { // Make sure the url string escapes all the oddball characters String processedURL = encodeForURL(href); - // Generate a requeset for the aggregated resource - ARurl = new URL(processedURL); - in = ARurl.openStream(); + if (validResourceUri(entryId, processedURL)) { + // Generate a request for the aggregated resource + ARurl = new URL(processedURL); + in = ARurl.openStream(); + } else { + throw new FileNotFoundException("Failed to validate " + processedURL); + } } catch (FileNotFoundException fe) { log.error("The provided URI failed to return a resource: " + href); } catch (ConnectException fe) { @@ -219,17 +230,17 @@ public void ingest(Context context, DSpaceObject dso, Element root, boolean crea * @param sourceString source unescaped string */ private String encodeForURL(String sourceString) { - Character lowalpha[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', + Character[] lowalpha = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; - Character upalpha[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', + Character[] upalpha = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}; - Character digit[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; - Character mark[] = {'-', '_', '.', '!', '~', '*', '\'', '(', ')'}; + Character[] digit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; + Character[] mark = {'-', '_', '.', '!', '~', '*', '\'', '(', ')'}; // reserved - Character reserved[] = {';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '%', '#'}; + Character[] reserved = {';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '%', '#'}; Set URLcharsSet = new HashSet(); URLcharsSet.addAll(Arrays.asList(lowalpha)); @@ -251,4 +262,61 @@ private String encodeForURL(String sourceString) { return processedString.toString(); } + /** + * Validate a resource URI against the host and scheme of the remote OAI endpoint, or a configured + * list of allowed prefixes. + * This still implicitly "trusts" the remote OAI server, but will reject resource URIs with a totally + * different hostname to avoid downloading malicious resources from a compromised endpoint. + * Even if the URL prefix validation is disabled, schemes will still be enforced to http(s) so file:/// and + * other unwanted schemes cannot be used + * @param entryUrl the entryId of the parent ORE resource + * @param resourceUrl the resource URL of the aggregated ORE resource + * @return result of the validation + */ + private boolean validResourceUri(String entryUrl, String resourceUrl) { + try { + Set allowedSchemes = Set.of("http", "https"); + URI entryUri = new URI(entryUrl).normalize(); + URI resourceUri = new URI(resourceUrl).normalize(); + String scheme = resourceUri.getScheme(); + + if (scheme == null || + !allowedSchemes.contains(scheme.toLowerCase(Locale.ROOT))) { + log.warn("Illegal scheme requested for ORE resource: {}", resourceUri); + return false; + } + + if (configurationService.getBooleanProperty("oai.harvester.ore.file.validateUrlPrefix", false)) { + for (String allowedPrefix : configurationService + .getArrayProperty("oai.harvester.ore.file.allowedUrlPrefix")) { + URI allowedUri = new URI(allowedPrefix).normalize(); + // Return true on the first allowed prefix match + if (Objects.equals(resourceUri.getScheme(), allowedUri.getScheme()) + && Objects.equals(resourceUri.getHost().toLowerCase(Locale.ROOT), + allowedUri.getHost().toLowerCase(Locale.ROOT))) { + return true; + } + } + + // If no allowed prefixes were matched, we require scheme + host to match the remote OAI server + if (!Objects.equals(entryUri.getScheme(), resourceUri.getScheme())) { + log.warn("Illegal scheme requested for ORE resource: {}", resourceUri); + return false; + } + if (!Objects.equals( + entryUri.getHost().toLowerCase(Locale.ROOT), + resourceUri.getHost().toLowerCase(Locale.ROOT))) { + log.warn("Illegal host requested for ORE resource: {}", resourceUri); + return false; + } + } + + return true; + + } catch (URISyntaxException e) { + log.warn("Could not validate ORE resource URI: {}", resourceUrl); + return false; + } + } + } diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java index 2fdbaaad003e..51e6357d93e1 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java @@ -22,6 +22,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -125,7 +126,7 @@ public class QDCCrosswalk extends SelfNamedPlugin // XML schemaLocation fragment for this crosswalk, from config. private String schemaLocation = null; - private static final SAXBuilder builder = new SAXBuilder(); + private static final SAXBuilder builder = XMLUtils.getSAXBuilder(); protected ItemService itemService = ContentServiceFactory.getInstance().getItemService(); diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java index 2c763036ce33..8d5bf49902cc 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java @@ -13,6 +13,7 @@ import java.sql.SQLException; import java.util.List; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.packager.PackageDisseminator; @@ -208,7 +209,7 @@ public Element disseminateElement(Context context, DSpaceObject dso) try { //Try to parse our XML results (which were disseminated by the Packager) - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document xmlDocument = builder.build(tempFile); //If XML parsed successfully, return root element of doc if (xmlDocument != null && xmlDocument.hasRootElement()) { diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java index 63ef5f7336c7..b07b2b2228e4 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -297,7 +298,7 @@ public static void main(String[] argv) throws Exception { "Failed to initialize transformer, probably error loading stylesheet."); } - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document inDoc = builder.build(new FileInputStream(argv[i + 1])); XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); List dimList; diff --git a/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java b/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java index 3399bdf0f07e..a1ed3c124374 100644 --- a/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java +++ b/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java @@ -20,6 +20,7 @@ import org.apache.commons.codec.binary.Base64; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.Bundle; @@ -265,12 +266,13 @@ protected METSManifest(SAXBuilder builder, Element mets, String configName) { public static METSManifest create(InputStream is, boolean validate, String configName) throws IOException, MetadataValidationException { - SAXBuilder builder = new SAXBuilder(validate); + SAXBuilder builder = XMLUtils.getSAXBuilder(); builder.setIgnoringElementContentWhitespace(true); // Set validation feature if (validate) { + builder.setValidation(true); builder.setFeature("http://apache.org/xml/features/validation/schema", true); // Tell the parser where local copies of schemas are, to speed up @@ -278,10 +280,6 @@ public static METSManifest create(InputStream is, boolean validate, String confi if (localSchemas.length() > 0) { builder.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", localSchemas); } - } else { - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); } // Parse the METS file diff --git a/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java b/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java index 2ce3f50a3cbc..5c4cf214445e 100644 --- a/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java +++ b/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java @@ -19,6 +19,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.codec.DecoderException; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -386,7 +387,7 @@ public void ingestStream(Context context, DSpaceObject parent, Document document; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory(); dbf.setIgnoringComments(true); dbf.setCoalescing(true); DocumentBuilder db = dbf.newDocumentBuilder(); @@ -420,7 +421,7 @@ public DSpaceObject ingest(Context context, DSpaceObject parent, Document document; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory(); dbf.setIgnoringComments(true); dbf.setCoalescing(true); DocumentBuilder db = dbf.newDocumentBuilder(); diff --git a/dspace-api/src/main/java/org/dspace/core/Email.java b/dspace-api/src/main/java/org/dspace/core/Email.java index 64da629bcc53..3d0edf36ebb0 100644 --- a/dspace-api/src/main/java/org/dspace/core/Email.java +++ b/dspace-api/src/main/java/org/dspace/core/Email.java @@ -21,9 +21,7 @@ import java.util.Collections; import java.util.Date; import java.util.Enumeration; -import java.util.Iterator; import java.util.List; -import java.util.Properties; import javax.activation.DataHandler; import javax.activation.DataSource; import javax.activation.FileDataSource; @@ -41,42 +39,53 @@ import javax.mail.internet.MimeMultipart; import javax.mail.internet.ParseException; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.velocity.Template; import org.apache.velocity.VelocityContext; -import org.apache.velocity.app.Velocity; import org.apache.velocity.app.VelocityEngine; import org.apache.velocity.exception.MethodInvocationException; import org.apache.velocity.exception.ParseErrorException; import org.apache.velocity.exception.ResourceNotFoundException; -import org.apache.velocity.runtime.resource.loader.StringResourceLoader; import org.apache.velocity.runtime.resource.util.StringResourceRepository; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; /** - * Class representing an e-mail message, also used to send e-mails. + * Class representing an e-mail message. The {@link send} method causes the + * assembled message to be formatted and sent. *

* Typical use: - *

+ *
+ * Email email = Email.getEmail(path);
+ * email.addRecipient("foo@bar.com");
+ * email.addArgument("John");
+ * email.addArgument("On the Testing of DSpace");
+ * email.send();
+ * 
+ * {@code path} is the filesystem path of an email template, typically in + * {@code ${dspace.dir}/config/emails/} and can include the subject -- see + * below. Templates are processed by + * Apache Velocity. They may contain VTL directives and property + * placeholders. + *

+ * {@link #addArgument(Object)} adds a property to the {@code params} array + * in the Velocity context, which can be used to replace placeholder tokens + * in the message. These arguments are indexed by number in the order they were + * added to the message. + *

+ * The DSpace configuration properties are also available to templates as the + * array {@code config}, indexed by name. Example: {@code ${config.get('dspace.name')}} *

- * Email email = new Email();
- * email.addRecipient("foo@bar.com");
- * email.addArgument("John");
- * email.addArgument("On the Testing of DSpace");
- * email.send();
- *

+ * Recipients and attachments may be added as needed. See {@link #addRecipient}, + * {@link #addAttachment(File, String)}, and + * {@link #addAttachment(InputStream, String, String)}. *

- * name is the name of an email template in - * dspace-dir/config/emails/ (which also includes the subject.) - * arg0 and arg1 are arguments to fill out the - * message with. - *

- * Emails are formatted using Apache Velocity. Headers such as Subject may be - * supplied by the template, by defining them using #set(). Example: - *

+ * Headers such as Subject may be supplied by the template, by defining them + * using the VTL directive {@code #set()}. Only headers named in the DSpace + * configuration array property {@code mail.message.headers} will be added. + *

+ * Example: * *

  *
@@ -91,12 +100,14 @@
  *
  *     Thank you for sending us your submission "${params[1]}".
  *
+ *     --
+ *     The ${config.get('dspace.name')} Team
+ *
  * 
* *

* If the example code above was used to send this mail, the resulting mail * would have the subject Example e-mail and the body would be: - *

* *
  *
@@ -105,7 +116,16 @@
  *
  *     Thank you for sending us your submission "On the Testing of DSpace".
  *
+ *     --
+ *     The DSpace Team
+ *
  * 
+ *

+ * There are two ways to load a message body. One can create an instance of + * {@link Email} and call {@link #setContent} on it, passing the body as a String. Or + * one can use the static factory method {@link #getEmail} to load a file by its + * complete filesystem path. In either case the text will be loaded into a + * Velocity template. * * @author Robert Tansley * @author Jim Downing - added attachment handling code @@ -115,7 +135,6 @@ public class Email { /** * The content of the message */ - private String content; private String contentName; /** @@ -150,18 +169,6 @@ public class Email { /** Velocity template settings. */ private static final String RESOURCE_REPOSITORY_NAME = "Email"; - private static final Properties VELOCITY_PROPERTIES = new Properties(); - static { - VELOCITY_PROPERTIES.put(Velocity.RESOURCE_LOADERS, "string"); - VELOCITY_PROPERTIES.put("resource.loader.string.description", - "Velocity StringResource loader"); - VELOCITY_PROPERTIES.put("resource.loader.string.class", - StringResourceLoader.class.getName()); - VELOCITY_PROPERTIES.put("resource.loader.string.repository.name", - RESOURCE_REPOSITORY_NAME); - VELOCITY_PROPERTIES.put("resource.loader.string.repository.static", - "false"); - } /** Velocity template for a message body */ private Template template; @@ -176,13 +183,19 @@ public Email() { moreAttachments = new ArrayList<>(10); subject = ""; template = null; - content = ""; replyTo = null; charset = null; } /** - * Add a recipient + * Get configuration service + */ + private static ConfigurationService getConfigurationService() { + return DSpaceServicesFactory.getInstance().getConfigurationService(); + } + + /** + * Add a recipient. * * @param email the recipient's email address */ @@ -196,16 +209,24 @@ public void addRecipient(String email) { * "Subject:" line must be stripped. * * @param name a name for this message body - * @param cnt the content of the message + * @param content the content of the message */ - public void setContent(String name, String cnt) { - content = cnt; + public void setContent(String name, String content) { contentName = name; arguments.clear(); + + VelocityEngine templateEngine = new VelocityEngine(); + templateEngine.init(Utils.getSecureVelocityProperties(RESOURCE_REPOSITORY_NAME)); + + StringResourceRepository repo = (StringResourceRepository) + templateEngine.getApplicationAttribute(RESOURCE_REPOSITORY_NAME); + repo.putStringResource(contentName, content); + // Turn content into a template. + template = templateEngine.getTemplate(contentName); } /** - * Set the subject of the message + * Set the subject of the message. * * @param s the subject of the message */ @@ -214,7 +235,7 @@ public void setSubject(String s) { } /** - * Set the reply-to email address + * Set the reply-to email address. * * @param email the reply-to email address */ @@ -223,14 +244,22 @@ public void setReplyTo(String email) { } /** - * Fill out the next argument in the template + * Fill out the next argument in the template. * - * @param arg the value for the next argument + * @param arg the value for the next argument. If {@code null}, + * a zero-length string is substituted. */ public void addArgument(Object arg) { arguments.add(arg); } + /** + * Add an attachment bodypart to the message from an external file. + * + * @param f reference to a file to be attached. + * @param name a name for the resulting bodypart in the message's MIME + * structure. + */ public void addAttachment(File f, String name) { attachments.add(new FileAttachment(f, name)); } @@ -238,6 +267,17 @@ public void addAttachment(File f, String name) { /** When given a bad MIME type for an attachment, use this instead. */ private static final String DEFAULT_ATTACHMENT_TYPE = "application/octet-stream"; + /** + * Add an attachment bodypart to the message from a byte stream. + * + * @param is the content of this stream will become the content of the + * bodypart. + * @param name a name for the resulting bodypart in the message's MIME + * structure. + * @param mimetype the MIME type of the resulting bodypart, such as + * "text/pdf". If {@code null} it will default to + * "application/octet-stream", which is MIME for "unknown format". + */ public void addAttachment(InputStream is, String name, String mimetype) { if (null == mimetype) { LOG.error("Null MIME type replaced with '" + DEFAULT_ATTACHMENT_TYPE @@ -257,6 +297,11 @@ public void addAttachment(InputStream is, String name, String mimetype) { moreAttachments.add(new InputStreamAttachment(is, name, mimetype)); } + /** + * Set the character set of the message. + * + * @param cs the name of a character set, such as "UTF-8" or "EUC-JP". + */ public void setCharset(String cs) { charset = cs; } @@ -280,25 +325,27 @@ public void reset() { * {@code mail.message.headers} then that name and its value will be added * to the message's headers. * - *

"subject" is treated specially: if {@link setSubject()} has not been called, - * the value of any "subject" property will be used as if setSubject had - * been called with that value. Thus a template may define its subject, but - * the caller may override it. + *

"subject" is treated specially: if {@link #setSubject} has not been + * called, the value of any "subject" property will be used as if setSubject + * had been called with that value. Thus a template may define its subject, + * but the caller may override it. * * @throws MessagingException if there was a problem sending the mail. * @throws IOException if IO error */ public void send() throws MessagingException, IOException { - ConfigurationService config - = DSpaceServicesFactory.getInstance().getConfigurationService(); + if (null == template) { + // No template -- no content -- PANIC!!! + throw new MessagingException("Email has no body"); + } // Get the mail configuration properties - String from = config.getProperty("mail.from.address"); - boolean disabled = config.getBooleanProperty("mail.server.disabled", false); + String from = getConfigurationService().getProperty("mail.from.address"); + boolean disabled = getConfigurationService().getBooleanProperty("mail.server.disabled", false); // If no character set specified, attempt to retrieve a default if (charset == null) { - charset = config.getProperty("mail.charset"); + charset = getConfigurationService().getProperty("mail.charset"); } // Get session @@ -308,34 +355,20 @@ public void send() throws MessagingException, IOException { MimeMessage message = new MimeMessage(session); // Set the recipients of the message - Iterator i = recipients.iterator(); - - while (i.hasNext()) { - message.addRecipient(Message.RecipientType.TO, new InternetAddress( - i.next())); + for (String recipient : recipients) { + message.addRecipient(Message.RecipientType.TO, + new InternetAddress(recipient)); } + // Get headers defined by the template. + String[] templateHeaders = getConfigurationService().getArrayProperty("mail.message.headers"); // Format the mail message body - VelocityEngine templateEngine = new VelocityEngine(); - templateEngine.init(VELOCITY_PROPERTIES); - VelocityContext vctx = new VelocityContext(); - vctx.put("config", new UnmodifiableConfigurationService(config)); + // Pass a restricted (via configuration) list of resolved Configuration keys and values, for + // template lookup + vctx.put("config", Utils.getAllowedTemplateConfig()); vctx.put("params", Collections.unmodifiableList(arguments)); - if (null == template) { - if (StringUtils.isBlank(content)) { - // No template and no content -- PANIC!!! - throw new MessagingException("Email has no body"); - } - // No template, so use a String of content. - StringResourceRepository repo = (StringResourceRepository) - templateEngine.getApplicationAttribute(RESOURCE_REPOSITORY_NAME); - repo.putStringResource(contentName, content); - // Turn content into a template. - template = templateEngine.getTemplate(contentName); - } - StringWriter writer = new StringWriter(); try { template.merge(vctx, writer); @@ -351,8 +384,7 @@ public void send() throws MessagingException, IOException { message.setSentDate(date); message.setFrom(new InternetAddress(from)); - // Get headers defined by the template. - for (String headerName : config.getArrayProperty("mail.message.headers")) { + for (String headerName : templateHeaders) { String headerValue = (String) vctx.get(headerName); if ("subject".equalsIgnoreCase(headerName)) { if (null != headerValue) { @@ -403,7 +435,8 @@ public void send() throws MessagingException, IOException { // add the stream messageBodyPart = new MimeBodyPart(); messageBodyPart.setDataHandler(new DataHandler( - new InputStreamDataSource(attachment.name,attachment.mimetype,attachment.is))); + new InputStreamDataSource(attachment.name, + attachment.mimetype, attachment.is))); messageBodyPart.setFileName(attachment.name); multipart.addBodyPart(messageBodyPart); } @@ -445,6 +478,9 @@ public void send() throws MessagingException, IOException { /** * Get the VTL template for an email message. The message is suitable * for inserting values using Apache Velocity. + *

+ * Note that everything is stored here, so that only send() throws a + * MessagingException. * * @param emailFile * full name for the email template, for example "/dspace/config/emails/register". @@ -482,15 +518,6 @@ public static Email getEmail(String emailFile) } return email; } - /* - * Implementation note: It might be necessary to add a quick utility method - * like "send(to, subject, message)". We'll see how far we get without it - - * having all emails as templates in the config allows customisation and - * internationalisation. - * - * Note that everything is stored and the run in send() so that only send() - * throws a MessagingException. - */ /** * Test method to send an email to check email server settings @@ -545,7 +572,7 @@ public static void main(String[] args) { } /** - * Utility struct class for handling file attachments. + * Utility record class for handling file attachments. * * @author ojd20 */ @@ -561,7 +588,7 @@ public FileAttachment(File f, String n) { } /** - * Utility struct class for handling file attachments. + * Utility record class for handling file attachments. * * @author Adán Román Ruiz at arvo.es */ @@ -578,6 +605,8 @@ public InputStreamAttachment(InputStream is, String name, String mimetype) { } /** + * Wrap an {@link InputStream} in a {@link DataSource}. + * * @author arnaldo */ public static class InputStreamDataSource implements DataSource { @@ -585,6 +614,14 @@ public static class InputStreamDataSource implements DataSource { private final String contentType; private final ByteArrayOutputStream baos; + /** + * Consume the content of an InputStream and store it in a local buffer. + * + * @param name give the DataSource a name. + * @param contentType the DataSource contains this type of data. + * @param inputStream content to be buffered in the DataSource. + * @throws IOException if the stream cannot be read. + */ InputStreamDataSource(String name, String contentType, InputStream inputStream) throws IOException { this.name = name; this.contentType = contentType; @@ -616,31 +653,4 @@ public OutputStream getOutputStream() throws IOException { throw new IOException("Cannot write to this read-only resource"); } } - - /** - * Wrap ConfigurationService to prevent templates from modifying - * the configuration. - */ - public static class UnmodifiableConfigurationService { - private final ConfigurationService configurationService; - - /** - * Swallow an instance of ConfigurationService. - * - * @param cs the real instance, to be wrapped. - */ - public UnmodifiableConfigurationService(ConfigurationService cs) { - configurationService = cs; - } - - /** - * Look up a key in the actual ConfigurationService. - * - * @param key to be looked up in the DSpace configuration. - * @return whatever value ConfigurationService associates with {@code key}. - */ - public String get(String key) { - return configurationService.getProperty(key); - } - } } diff --git a/dspace-api/src/main/java/org/dspace/core/Utils.java b/dspace-api/src/main/java/org/dspace/core/Utils.java index b9fff20c7674..68d9a56cb61a 100644 --- a/dspace-api/src/main/java/org/dspace/core/Utils.java +++ b/dspace-api/src/main/java/org/dspace/core/Utils.java @@ -31,16 +31,23 @@ import java.util.Collections; import java.util.Date; import java.util.GregorianCalendar; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; import java.util.Random; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import com.coverity.security.Escape; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringSubstitutor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.velocity.app.Velocity; +import org.apache.velocity.runtime.resource.loader.StringResourceLoader; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; @@ -103,6 +110,11 @@ public final class Utils { private static final Calendar outCal = GregorianCalendar.getInstance(); + // Allowed configuration properties to pass to Velocity templates (Email, LDN) + private static final String[] DEFAULT_ALLOWED_TEMPLATE_CONFIGS = { + "dspace.name", "dspace.shortname", "dspace.ui.url", + "mail.helpdesk", "mail.message.helpdesk.telephone", "mail.admin", "mail.admin.name"}; + /** * Private constructor */ @@ -506,4 +518,64 @@ public static String interpolateConfigsInString(String string) { ConfigurationService config = DSpaceServicesFactory.getInstance().getConfigurationService(); return StringSubstitutor.replace(string, config.getProperties()); } + + /** + * Get a list of allowed DSpace configuration property keys that will be exposed to Velocity templates + * (used in Email and LDN messages) as a simple Map of strings. + * @return Map of strings representing resolved configuration properties + */ + public static Map getAllowedTemplateConfig() { + // Pass a restricted (via configuration) list of resolved Configuration keys and values, for + // template lookup + ConfigurationService configurationService = + DSpaceServicesFactory.getInstance().getConfigurationService(); + List allowedConfigurationKeys = List.of(configurationService.getArrayProperty( + "message.templates.allowed-config", DEFAULT_ALLOWED_TEMPLATE_CONFIGS)); + return allowedConfigurationKeys.stream() + .map(key -> { + String value = configurationService.getProperty(key); + return value != null ? Map.entry(key, value) : null; + }) + .filter(Objects::nonNull) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue + )); + } + + /** + * Create and return a set of default, secure Velocity configuration properties. + * @see {@link Email} + * + * @param resourceRepositoryName the templating context e.g. "LDN", "Email" + * @returns secure Velocity configuration for use with templating + */ + public static Properties getSecureVelocityProperties(String resourceRepositoryName) { + Properties secureVelocityProperties = new Properties(); + // Basic Velocity configuration + secureVelocityProperties.setProperty(Velocity.RESOURCE_LOADERS, "string"); + secureVelocityProperties.setProperty("resource.loader.string.description", + "Velocity StringResource loader"); + secureVelocityProperties.setProperty("resource.loader.string.class", + StringResourceLoader.class.getName()); + secureVelocityProperties.setProperty("resource.loader.string.repository.name", + resourceRepositoryName); + secureVelocityProperties.setProperty("resource.loader.string.repository.static", + "false"); + // Set secure default introspection and class restriction handling in Velocity + secureVelocityProperties.setProperty("introspector.uberspect.class", + "org.apache.velocity.util.introspection.SecureUberspector"); + secureVelocityProperties.setProperty("introspector.restrict.classes", + "java.lang.Class,java.lang.Runtime,java.lang.System"); + secureVelocityProperties.setProperty( "introspector.restrict.packages", + "java.lang.reflect,java.io,java.nio"); + // Set strict mode if configured (default: false, as we've always treated null values as blanks) + if (DSpaceServicesFactory.getInstance().getConfigurationService() + .getBooleanProperty("message.templates.strict_mode", false)) { + secureVelocityProperties.setProperty("runtime.strict_mode.enable", "true"); + } + + return secureVelocityProperties; + } + } diff --git a/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java b/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java index f7ab18c01e54..32d897dd17db 100644 --- a/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java +++ b/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java @@ -37,6 +37,7 @@ import org.apache.http.impl.client.HttpClientBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -176,7 +177,7 @@ public void init(Curator curator, String taskId) throws IOException { fieldSeparator = (fldSep != null) ? fldSep : " "; urlTemplate = taskProperty("template"); templateParam = urlTemplate.substring(urlTemplate.indexOf("{") + 1, - urlTemplate.indexOf("}")); + urlTemplate.indexOf("}")); String[] parsed = parseTransform(templateParam); lookupField = parsed[0]; lookupTransform = parsed[1]; @@ -204,12 +205,9 @@ public void init(Curator curator, String taskId) throws IOException { } } // initialize response document parser - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); try { - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + DocumentBuilderFactory factory = XMLUtils.getDocumentBuilderFactory(); + factory.setNamespaceAware(true); docBuilder = factory.newDocumentBuilder(); } catch (ParserConfigurationException pcE) { log.error("caught exception: " + pcE); diff --git a/dspace-api/src/main/java/org/dspace/curate/Curation.java b/dspace-api/src/main/java/org/dspace/curate/Curation.java index b3af072a32cd..260e911c8f05 100644 --- a/dspace-api/src/main/java/org/dspace/curate/Curation.java +++ b/dspace-api/src/main/java/org/dspace/curate/Curation.java @@ -10,15 +10,18 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.UUID; @@ -35,6 +38,8 @@ import org.dspace.handle.factory.HandleServiceFactory; import org.dspace.handle.service.HandleService; import org.dspace.scripts.DSpaceRunnable; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.storage.secure.SecureFileAccess; import org.dspace.utils.DSpace; /** @@ -107,8 +112,16 @@ private void handleCurationTask(Curator curator) throws IOException, SQLExceptio } else if (commandLine.hasOption('T')) { // load taskFile BufferedReader reader = null; + // in this case, Curation CLI expects to calculate the -T parameter from the user's current working dir + String taskFilePath = SecureFileAccess.calculateAbsolutePathUsingCwd(this.taskFile); try { - reader = new BufferedReader(new FileReader(this.taskFile)); + String dspaceDir = DSpaceServicesFactory.getInstance() + .getConfigurationService().getProperty("dspace.dir"); + List allowedTaskFileBasePath = new ArrayList<>( + Arrays.asList(DSpaceServicesFactory.getInstance().getConfigurationService() + .getArrayProperty("curate.taskfile.base", new String[]{dspaceDir}))); + reader = SecureFileAccess.getBufferedReader(taskFilePath, allowedTaskFileBasePath, + "curation-taskfile", StandardCharsets.UTF_8); while ((taskName = reader.readLine()) != null) { if (verbose) { super.handler.logInfo("Adding task: " + taskName); @@ -191,12 +204,25 @@ private void endScript(long timeRun) throws SQLException { private Curator initCurator() throws FileNotFoundException { Curator curator = new Curator(handler); OutputStream reporterStream; + String dspaceDir = DSpaceServicesFactory.getInstance() + .getConfigurationService().getProperty("dspace.dir"); + List allowedReporterBasePaths = new ArrayList<>(Arrays.asList(DSpaceServicesFactory.getInstance() + .getConfigurationService().getArrayProperty("curate.reporter.base", + new String[]{dspaceDir + File.separatorChar + "log"}))); if (null == this.reporter) { reporterStream = new NullOutputStream(); } else if ("-".equals(this.reporter)) { reporterStream = System.out; } else { - reporterStream = new PrintStream(this.reporter); + // Reporter param comes from CLI execution. Calculate abs path from user's current working dir + String reporterFilePath = SecureFileAccess.calculateAbsolutePathUsingCwd(this.reporter); + try { + reporterStream = new PrintStream( + SecureFileAccess.getOutputStream( + reporterFilePath, allowedReporterBasePaths, "curation-reporter")); + } catch (IOException e) { + throw new FileNotFoundException(e.getLocalizedMessage()); + } } Writer reportWriter = new OutputStreamWriter(reporterStream); curator.setReporter(reportWriter); diff --git a/dspace-api/src/main/java/org/dspace/curate/CurationCliScriptConfiguration.java b/dspace-api/src/main/java/org/dspace/curate/CurationCliScriptConfiguration.java index eaa04f477829..925bd4f2d232 100644 --- a/dspace-api/src/main/java/org/dspace/curate/CurationCliScriptConfiguration.java +++ b/dspace-api/src/main/java/org/dspace/curate/CurationCliScriptConfiguration.java @@ -20,6 +20,10 @@ public Options getOptions() { options = super.getOptions(); options.addOption("e", "eperson", true, "email address of curating eperson"); options.getOption("e").setRequired(true); + options.addOption("r", "reporter", true, + "relative or absolute path to the desired report file. Use '-' to report to console. If absent, no " + + "reporting"); + options.addOption("T", "taskfile", true, "file containing curation task names"); return options; } } diff --git a/dspace-api/src/main/java/org/dspace/curate/CurationClientOptions.java b/dspace-api/src/main/java/org/dspace/curate/CurationClientOptions.java index 8ec0f14697c0..03ad2f34b230 100644 --- a/dspace-api/src/main/java/org/dspace/curate/CurationClientOptions.java +++ b/dspace-api/src/main/java/org/dspace/curate/CurationClientOptions.java @@ -31,7 +31,8 @@ public enum CurationClientOptions { /** * This method resolves the CommandLine parameters to figure out which action the curation script should perform * - * @param commandLine The relevant CommandLine for the curation script + * @param commandLine The relevant CommandLine for the curation script. Note that -T is passed only + * from CurationCliScriptConfig and is not accessible from UI processes * @return The curation option to be ran, parsed from the CommandLine */ protected static CurationClientOptions getClientOption(CommandLine commandLine) { @@ -54,14 +55,10 @@ protected static Options constructOptions() { Options options = new Options(); options.addOption("t", "task", true, "curation task name; options: " + getTaskOptions()); - options.addOption("T", "taskfile", true, "file containing curation task names"); options.addOption("i", "id", true, "Id (handle) of object to perform task on, or 'all' to perform on whole repository"); options.addOption("p", "parameter", true, "a task parameter 'NAME=VALUE'"); options.addOption("q", "queue", true, "name of task queue to process"); - options.addOption("r", "reporter", true, - "relative or absolute path to the desired report file. Use '-' to report to console. If absent, no " + - "reporting"); options.addOption("s", "scope", true, "transaction scope to impose: use 'object', 'curation', or 'open'. If absent, 'open' applies"); options.addOption("v", "verbose", false, "report activity to stdout"); diff --git a/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java b/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java index 8f48cda712bc..0e156f7ab5f3 100644 --- a/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java +++ b/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java @@ -12,7 +12,11 @@ import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import org.dspace.app.util.XMLUtils; import org.xml.sax.SAXException; /** @@ -28,11 +32,14 @@ public abstract class Converter { protected Object unmarshall(InputStream input, Class type) throws SAXException, URISyntaxException { try { + XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory(); + XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(input); + JAXBContext context = JAXBContext.newInstance(type); Unmarshaller unmarshaller = context.createUnmarshaller(); - return unmarshaller.unmarshal(input); - } catch (JAXBException e) { - throw new RuntimeException("Unable to unmarshall orcid message" + e); + return unmarshaller.unmarshal(xmlStreamReader); + } catch (JAXBException | XMLStreamException e) { + throw new RuntimeException("Unable to unmarshall orcid message: " + e); } } } diff --git a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java index 57136d6143bb..6da6ce1db8bf 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java +++ b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java @@ -33,6 +33,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.util.EntityUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.crosswalk.CrosswalkException; @@ -803,7 +804,7 @@ protected String extractAlternateIdentifier(Context context, String content) } // parse the XML - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document doc = null; try { doc = saxBuilder.build(new ByteArrayInputStream(content.getBytes("UTF-8"))); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 96689e62ba75..4ee8a165e6de 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -23,6 +23,7 @@ import javax.ws.rs.core.Response; import org.apache.commons.lang3.StringUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -219,7 +220,7 @@ public Integer call() throws Exception { if (response.getStatus() == 200) { String responseString = response.readEntity(String.class); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(responseString)); Element root = document.getRootElement(); @@ -400,7 +401,7 @@ private String getQuery(Query query) { private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java index 5eff46c790e4..c0fa4ea22027 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java @@ -27,6 +27,7 @@ import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -302,7 +303,7 @@ protected List search(String id, String appId) private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); return root.getChildren(); @@ -355,7 +356,7 @@ private List getCiniiIds(String appId, Integer maxResult, String author, Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); int url_len = this.url.length() - 1; - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); List namespaces = Arrays.asList( @@ -417,7 +418,7 @@ private Integer countCiniiElement(String appId, Integer maxResult, String author Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); List namespaces = Arrays @@ -444,4 +445,4 @@ private MetadatumDTO createIdentifier(String id) { return metadatumDTO; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java index 7240e356e371..552f607827a8 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java @@ -32,6 +32,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.xerces.impl.dv.util.Base64; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -397,7 +398,11 @@ private Integer countDocument(String bearer, String query) { String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); + // To properly parse EPO responses, we must allow DOCTYPEs overall. But, we can still apply all the + // other default XXE protections, including disabling external entities and entity expansion. + // NOTE: we only need to allow DOCTYPEs for this initial API call. All other calls have them disabled. + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -434,7 +439,7 @@ private List searchDocumentIds(String bearer, String query, int s String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -485,7 +490,7 @@ private List searchDocument(String bearer, String id, String docTy private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); List namespaces = Arrays.asList(Namespace.getNamespace("ns", "http://www.epo.org/exchange")); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java index b30ea22ca4e4..b5baf175a967 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java @@ -24,6 +24,7 @@ import com.google.common.io.CharStreams; import org.apache.commons.lang3.StringUtils; import org.apache.http.client.utils.URIBuilder; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -233,7 +234,10 @@ private String getSingleElementValue(String src, String elementName) { String value = null; try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); + // To properly parse PubMed responses, we must allow DOCTYPEs overall. But, we can still apply all the + // other default XXE protections, including disabling external entities and entity expansion. + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false); Document document = saxBuilder.build(new StringReader(src)); Element root = document.getRootElement(); @@ -337,7 +341,10 @@ public Collection call() throws Exception { private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); + // To properly parse PubMed responses, we must allow DOCTYPEs overall. But, we can still apply all the + // other default XXE protections, including disabling external entities and entity expansion. + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java index 1ec0da74206e..02d6358c7c73 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java @@ -25,6 +25,7 @@ import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -293,7 +294,7 @@ public Integer count(String query) throws URISyntaxException, ClientProtocolExce Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, buildURI(1, query), params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); Element element = root.getChild("hitCount"); @@ -364,7 +365,7 @@ public List search(String query, Integer size, Integer start) thro String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); String cursorMark = StringUtils.EMPTY; if (StringUtils.isNotBlank(response)) { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); XPathFactory xpfac = XPathFactory.instance(); XPathExpression xPath = xpfac.compile("//responseWrapper/resultList/result", @@ -416,4 +417,4 @@ public void setUrl(String url) { this.url = url; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java index d0c2fb078a2c..59d790872ba4 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java @@ -24,6 +24,7 @@ import javax.el.MethodNotFoundException; import org.apache.commons.lang3.StringUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -201,7 +202,7 @@ public Integer call() throws Exception { params.put(URI_PARAMETERS, requestParams); String response = liveImportClient.executeHttpGetRequest(timeout, url, params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -376,7 +377,7 @@ private Map getRequestParameters(String query, String viewMode, private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); List records = root.getChildren("entry",Namespace.getNamespace("http://www.w3.org/2005/Atom")); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java index 2ccdc12b8db2..7db9789c6ff2 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -146,7 +147,7 @@ public Integer call() throws Exception { params.put(HEADER_PARAMETERS, getRequestParameters()); String response = liveImportClient.executeHttpGetRequest(timeout, url, params); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); XPathExpression xpath = XPathFactory.instance().compile("//*[@name=\"RecordsFound\"]", @@ -284,7 +285,7 @@ private boolean isIsi(String query) { private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); String cData = XPathFactory.instance().compile("//*[@name=\"Records\"]", @@ -326,4 +327,4 @@ public void setApiKey(String apiKey) { this.apiKey = apiKey; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java b/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java index cdecadba5242..62a3cd9b19b2 100644 --- a/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java @@ -31,6 +31,7 @@ import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.util.EntityUtils; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.services.ConfigurationService; import org.jdom2.Attribute; import org.jdom2.Document; @@ -53,7 +54,7 @@ public class CCLicenseConnectorServiceImpl implements CCLicenseConnectorService, private Logger log = org.apache.logging.log4j.LogManager.getLogger(CCLicenseConnectorServiceImpl.class); private CloseableHttpClient client; - protected SAXBuilder parser = new SAXBuilder(); + protected SAXBuilder parser = XMLUtils.getSAXBuilder(); private String postArgument = "answers"; private String postAnswerFormat = diff --git a/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java b/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java index 3e7ca7b21029..68b82503c018 100644 --- a/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java +++ b/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java @@ -42,6 +42,7 @@ import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.message.BasicNameValuePair; +import org.dspace.app.util.XMLUtils; import org.dspace.orcid.exception.OrcidClientException; import org.dspace.orcid.model.OrcidEntityType; import org.dspace.orcid.model.OrcidProfileSectionType; @@ -326,8 +327,7 @@ private String marshall(Object object) throws JAXBException { @SuppressWarnings("unchecked") private T unmarshall(HttpEntity entity, Class clazz) throws Exception { JAXBContext jaxbContext = JAXBContext.newInstance(clazz); - XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); - xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory(); XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(entity.getContent()); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); return (T) unmarshaller.unmarshal(xmlStreamReader); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java index 1fdf1e84e115..8d028d753b53 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java @@ -12,6 +12,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -248,7 +249,15 @@ protected File getFile(Bitstream bitstream) throws IOException { log.debug("Local filename for " + sInternalId + " is " + bufFilename.toString()); } - return new File(bufFilename.toString()); + File bitstreamFile = new File(bufFilename.toString()); + Path normalizedPath = bitstreamFile.toPath().normalize(); + if (!normalizedPath.startsWith(baseDir.getAbsolutePath())) { + log.error("Bitstream path outside of assetstore root requested:" + + "bitstream={}, path={}, assetstore={}", + bitstream.getID(), normalizedPath, baseDir.getAbsolutePath()); + throw new IOException("Illegal bitstream path constructed"); + } + return bitstreamFile; } public boolean isRegisteredBitstream(String internalId) { diff --git a/dspace-api/src/main/java/org/dspace/storage/secure/SecureFileAccess.java b/dspace-api/src/main/java/org/dspace/storage/secure/SecureFileAccess.java new file mode 100644 index 000000000000..45727d9ee5ae --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/secure/SecureFileAccess.java @@ -0,0 +1,166 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.secure; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +/** + * Decent I/O path validation - not perfect when symlinks are used and we are writing + * as 'toRealPath' check on the resolved path fails for new files + * + * @author Kim Shepherd + */ +public final class SecureFileAccess { + + private SecureFileAccess() {} + + /** + * Validate a given path against an allowed base path. Does not attempt to calculate "real path" + * before validation, as this breaks for new files which don't yet exist. This can make the resulting + * validation still vulnerable to symlink traversal in some cases + * @param file the unvalidated file, usually derived from user input or configuration + * This MUST be an absolute path, and the caller is expected to calculate it based on best + * context (e.g. configured base path, CWD, dspace.dir, and so on) + * @param allowedBasePaths list of allowed base paths for this use case as per system configuration + * @param purpose the name of the calling component / use case for logging and inspection + * @throws IOException on validation failure + */ + public static Path validatePathForWrite(String file, List allowedBasePaths, String purpose) + throws IOException { + Path filePath = Path.of(file); + if (!filePath.isAbsolute()) { + throw new IOException("Absolute path required for I/O (" + purpose + "): " + file); + } + for (String allowedBasePath : allowedBasePaths) { + Path basePath = Path.of(allowedBasePath) + .toRealPath() + .normalize(); + Path resolvedPath = basePath.resolve(file).normalize(); + if (resolvedPath.startsWith(basePath)) { + return resolvedPath; + } + } + + // If no valid path was resolved and returned by now + // we raise an exception and treat this as illegal access + throw new IOException("Illegal file path attempted for I/O (" + purpose + "): " + file); + } + + /** + * Validate a given path against an allowed base path. + * More secure than the 'write' variant because we can explicitly resolve links as well. + * + * @param file the unvalidated file, usually derived from user input or configuration + * This MUST be an absolute path, and the caller is expected to calculate it based on best + * context (e.g. configured base path, CWD, dspace.dir, and so on) + * @param allowedBasePaths the allowed base paths for this use case as per system configuration + * @param purpose the name of the calling component / use case for logging and inspection + * @throws IOException on validation failure + */ + public static Path validatePathForRead(String file, List allowedBasePaths, String purpose) + throws IOException { + Path filePath = Path.of(file); + if (!filePath.isAbsolute()) { + throw new IOException("Absolute path required for I/O (" + purpose + "): " + file); + } + for (String allowedBasePath : allowedBasePaths) { + Path basePath = Path.of(allowedBasePath) + .toRealPath() + .normalize(); + Path resolvedPath = basePath.resolve(file).toRealPath().normalize(); + if (resolvedPath.startsWith(basePath)) { + return resolvedPath; + } + } + // If no valid path was resolved and returned by now + // we raise an exception and treat this as illegal access + throw new IOException("Illegal file path attempted for I/O (" + purpose + "): " + file); + } + + /** + * Get a buffered reader after validating file path. + * @param unvalidatedFile the unvalidated file, usually derived from user input or configuration + * @param allowedBasePaths the allowed base paths for this use case as per system configuration + * @param purpose the name of the calling component / use case for logging and inspection + * @throws IOException on validation failure + */ + public static BufferedReader getBufferedReader(String unvalidatedFile, List allowedBasePaths, + String purpose, Charset charset) throws IOException { + if (charset == null) { + charset = StandardCharsets.UTF_8; + } + Path validatedFile = validatePathForRead(unvalidatedFile, allowedBasePaths, purpose); + return Files.newBufferedReader(validatedFile, charset); + } + + /** + * Get an input stream after validating file path. + * @param unvalidatedFile the unvalidated file, usually derived from user input or configuration + * @param allowedBasePaths the allowed base paths for this use case as per system configuration + * @param purpose the name of the calling component / use case for logging and inspection + * @throws IOException on validation failure + */ + public static InputStream getInputStream(String unvalidatedFile, List allowedBasePaths, String purpose) + throws IOException { + Path validatedFile = validatePathForRead(unvalidatedFile, allowedBasePaths, purpose); + return Files.newInputStream(validatedFile); + + } + + /** + * Get an output stream after validating file path. New files can't use toRealPath() for link calculation so + * there is a bit of a trade-off in allowing some symlink traversal to occur + * @param unvalidatedFile the unvalidated file, usually derived from user input or configuration + * @param allowedBasePaths the allowed base paths for this use case as per system configuration + * @param purpose the name of the calling component / use case for logging and inspection + * @throws IOException on validation failure + */ + public static OutputStream getOutputStream(String unvalidatedFile, List allowedBasePaths, String purpose) + throws IOException { + Path validatedFile = validatePathForWrite(unvalidatedFile, allowedBasePaths, purpose); + return Files.newOutputStream(validatedFile); + } + + /** + * Calculate an absolute path (if not already absolute) using current working dir as a root + * for relative file paths + * @param file the relative or absolute file given as input + * @return absolute path calculated from file and cwd + */ + public static String calculateAbsolutePathUsingCwd(String file) { + String filePath = file; + Path path = Path.of(filePath); + if (!path.isAbsolute()) { + filePath = Path.of("").toAbsolutePath().resolve(path).normalize().toString(); + } + return filePath; + } + + /** + * Calculate an absolute path (if not already absolute) using a given base dir as a root + * for relative file paths + * @param file the relative or absolute file given as input + * @return absolute path calculated from file and base dir + */ + public static String calculateAbsolutePathUsingBaseDir(String file, String baseDir) { + String filePath = file; + Path path = Path.of(filePath); + if (!path.isAbsolute()) { + filePath = Path.of(baseDir).toAbsolutePath().resolve(path).normalize().toString(); + } + return filePath; + } +} diff --git a/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java b/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java index 7f2bdc6ef771..bd19a1254fe3 100644 --- a/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java +++ b/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java @@ -12,7 +12,6 @@ import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -20,6 +19,7 @@ import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; import org.w3c.dom.Document; @@ -71,7 +71,7 @@ public static ControlledVocabulary loadVocabulary(String fileName) File controlledVocFile = new File(filePath.toString()); if (controlledVocFile.exists()) { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + DocumentBuilder builder = XMLUtils.getDocumentBuilder(); Document document = builder.parse(controlledVocFile); XPath xPath = XPathFactory.newInstance().newXPath(); Node node = (Node) xPath.compile("node").evaluate(document, XPathConstants.NODE); diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/BitstreamRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/BitstreamRestController.java index 07126993d1e6..434c9f9c6d0c 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/BitstreamRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/BitstreamRestController.java @@ -20,6 +20,7 @@ import javax.ws.rs.core.Response; import org.apache.catalina.connector.ClientAbortException; +import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.dspace.app.rest.converter.ConverterService; @@ -204,12 +205,39 @@ private boolean isNotAnErrorResponse(HttpServletResponse response) { || responseCode.equals(Response.Status.Family.REDIRECTION); } + /** + * Check if a Bitstream of the specified format should always be downloaded (i.e. "content-disposition: attachment") + * or can be opened inline (i.e. "content-disposition: inline"). + *

+ * NOTE that downloading via "attachment" is more secure, as the user's browser will not attempt to process or + * display the file. But, downloading via "inline" may be seen as more user-friendly for common formats. + * @param format BitstreamFormat + * @return true if always download ("attachment"). false if can be opened inline ("inline") + */ private boolean checkFormatForContentDisposition(BitstreamFormat format) { - // never automatically download undefined formats - if (format == null) { - return false; + // Undefined or Unknown formats should ALWAYS be downloaded for additional security. + if (format == null || format.getSupportLevel() == BitstreamFormat.UNKNOWN) { + return true; + } + + // Load additional formats configured to require download + List configuredFormats = List.of(configurationService. + getArrayProperty("webui.content_disposition_format")); + + // If configuration includes "*", then all formats will always be downloaded. + if (configuredFormats.contains("*")) { + return true; } - List formats = List.of((configurationService.getArrayProperty("webui.content_disposition_format"))); + + // Define a download list of formats which DSpace forces to ALWAYS be downloaded. + // These formats can embed JavaScript which may be run in the user's browser if the file is opened inline. + // Therefore, DSpace blocks opening these formats inline as it could be used for an XSS attack. + List downloadOnlyFormats = List.of("text/html", "text/javascript", "text/xml", "rdf"); + + // Combine our two lists + List formats = ListUtils.union(downloadOnlyFormats, configuredFormats); + + // See if the passed in format's MIME type or file extension is listed. boolean download = formats.contains(format.getMIMEType()); if (!download) { for (String ext : format.getExtensions()) { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/security/GlobalRequestSecurityFilter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/security/GlobalRequestSecurityFilter.java new file mode 100644 index 000000000000..a9f0eea8d728 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/security/GlobalRequestSecurityFilter.java @@ -0,0 +1,140 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.security; + +import java.io.IOException; +import java.net.URI; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.util.Locale; +import javax.servlet.FilterChain; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.springframework.core.Ordered; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; +import org.springframework.web.filter.OncePerRequestFilter; + +/** + * Global filter acting on all requests (not just /api/) to provide some additional hardening + * against common attacks or RCE, if a malicious payload was somehow written to a directory + * executable by the servlet container. + * The decoding and normalisation is designed to be tolerant of malformed URLs or broken clients, etc. + * so that this additional security filter does not introduce false positives or unintended side effects. + * + * @author Kim Shepherd + */ +@Component +@Order(Ordered.HIGHEST_PRECEDENCE) +public class GlobalRequestSecurityFilter extends OncePerRequestFilter { + + @Override + protected void doFilterInternal( + HttpServletRequest request, + HttpServletResponse response, + FilterChain filterChain + ) throws ServletException, IOException { + String normalizedPath = normaliseUrl(request.getRequestURI()); + // Return 403 forbidden if JSP execution or URL traversal is attempted + if (isTraversalAttempt(normalizedPath)) { + logger.warn("Path traversal attempt detected. Skipping request: " + request.getRequestURI()); + response.sendError(HttpServletResponse.SC_FORBIDDEN); + return; + } + if (isJspExecutionAttempt(normalizedPath)) { + logger.warn("JSP execution attempt detected. Skipping request: " + request.getRequestURI()); + response.sendError(HttpServletResponse.SC_FORBIDDEN); + return; + } + filterChain.doFilter(request, response); + } + + /** + * Normalise the URI similarly to Tomcat, for testing how it will be interpreted + * @param rawUrl the unvalidated URL string + * @return a decoded, normalise URL + */ + private String normaliseUrl(String rawUrl) throws IOException { + if (rawUrl == null || rawUrl.isBlank()) { + throw new IOException("Empty URL"); + } + String url = rawUrl.split("\\?")[0]; + // Strip ;jspsession=... and so on + int semicolon = url.indexOf(';'); + if (semicolon >= 0) { + url = url.substring(0, semicolon); + } + url = decodeUrl(url); + if (url == null || url.isBlank()) { + throw new IOException("Decoded URL path is empty"); + } + url = normaliseUrlPath(url); + if (url == null || url.isBlank()) { + throw new IOException("Normalised URL path is empty"); + } + return url.toLowerCase(Locale.ROOT); + } + + /** + * Decode URL, falling back to original URL if it's malformed or undecodable + * @param url the encoded / unvalidated URL + * @return decoded URL or the original URL on error + */ + private String decodeUrl(String url) { + try { + return URLDecoder.decode(url, StandardCharsets.UTF_8); + } catch (IllegalArgumentException ex) { + // if we can't decode it, just return raw string + return url; + } + } + + /** + * Normalise the URL path and ensure it ends in a / + * @param url the URL path to normalise + * @return normalised path or the original parameter on error + */ + private String normaliseUrlPath(String url) { + try { + if (!url.startsWith("/")) { + url = "/" + url; + } + return new URI(url).normalize().getPath(); + } catch (Exception e) { + // if we can't use or normalise the path, just return the raw string + return url; + } + } + + /** + * Detect traversal after normalisation + * @param url the URL path to validate + * @return true if this looks like a traversal attempt + */ + private boolean isTraversalAttempt(String url) { + return url.contains("../") + || url.contains("/..") + || url.contains("%2e%2e") + || url.contains(".."); + } + + /** + * Block JSP execution attempts + * @param url the URL path to validate + */ + private boolean isJspExecutionAttempt(String url) { + return url.endsWith(".jsp") + || url.endsWith(".jspx") + || url.contains(".jsp/") + || url.contains(".jspx/") + || url.contains(".jsp\0") + || url.contains(".jspx\0"); + } +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamFormatRestRepositoryIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamFormatRestRepositoryIT.java index 1a6cc29ca75c..d5798ba5a3f0 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamFormatRestRepositoryIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamFormatRestRepositoryIT.java @@ -56,7 +56,7 @@ public class BitstreamFormatRestRepositoryIT extends AbstractControllerIntegrati @Autowired private BitstreamFormatConverter bitstreamFormatConverter; - private final int DEFAULT_AMOUNT_FORMATS = 81; + private final int DEFAULT_AMOUNT_FORMATS = 82; @Test public void findAllPaginationTest() throws Exception { diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java index a7fb6489c4fa..f055df678989 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java @@ -1196,7 +1196,7 @@ public void checkContentDispositionOfFormats() throws Exception { Bitstream rtf; Bitstream xml; Bitstream txt; - Bitstream html; + Bitstream csv; try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) { rtf = BitstreamBuilder.createBitstream(context, item, is) .withMimeType("text/richtext").build(); @@ -1204,8 +1204,8 @@ public void checkContentDispositionOfFormats() throws Exception { .withMimeType("text/xml").build(); txt = BitstreamBuilder.createBitstream(context, item, is) .withMimeType("text/plain").build(); - html = BitstreamBuilder.createBitstream(context, item, is) - .withMimeType("text/html").build(); + csv = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("text/csv").build(); } context.restoreAuthSystemState(); @@ -1214,9 +1214,89 @@ public void checkContentDispositionOfFormats() throws Exception { verifyBitstreamDownload(xml, "text/xml;charset=UTF-8", true); verifyBitstreamDownload(txt, "text/plain;charset=UTF-8", true); // this format is not configured and should open inline - verifyBitstreamDownload(html, "text/html;charset=UTF-8", false); + verifyBitstreamDownload(csv, "text/csv;charset=UTF-8", false); } + @Test + public void checkHardcodedContentDispositionFormats() throws Exception { + // This test is similar to the above test, but it verifies that our *hardcoded settings* for + // webui.content_disposition_format are protecting us from loading specific formats *inline*. + context.turnOffAuthorisationSystem(); + Community community = CommunityBuilder.createCommunity(context).build(); + Collection collection = CollectionBuilder.createCollection(context, community).build(); + Item item = ItemBuilder.createItem(context, collection).build(); + String content = "Test Content"; + Bitstream html; + Bitstream js; + Bitstream rdf; + Bitstream xml; + Bitstream unknown; + Bitstream svg; + try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) { + html = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("text/html").build(); + js = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("text/javascript").build(); + // NOTE: RDF has a MIME Type with a "charset" in our bitstream-formats.xml + rdf = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("application/rdf+xml; charset=utf-8").build(); + xml = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("text/xml").build(); + unknown = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("application/octet-stream").build(); + svg = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("image/svg+xml").build(); + + } + context.restoreAuthSystemState(); + + // By default, HTML, JS & XML should all download. This protects us from possible XSS attacks, as + // each of these formats can embed JavaScript which may execute when the file is loaded *inline*. + verifyBitstreamDownload(html, "text/html;charset=UTF-8", true); + verifyBitstreamDownload(js, "text/javascript;charset=UTF-8", true); + verifyBitstreamDownload(rdf, "application/rdf+xml;charset=UTF-8", true); + verifyBitstreamDownload(xml, "text/xml;charset=UTF-8", true); + // Unknown file formats should also always download + verifyBitstreamDownload(unknown, "application/octet-stream;charset=UTF-8", true); + // SVG does NOT currently exist as a recognized format in DSpace's bitstream-formats.xml, but it's another + // format that allows embedded JavaScript. This test is a reminder that SVGs should NOT be opened inline. + verifyBitstreamDownload(svg, "application/octet-stream;charset=UTF-8", true); + } + + @Test + public void checkWildcardContentDispositionFormats() throws Exception { + // Setting "*" should result in all formats being downloaded (nothing will be opened inline) + configurationService.setProperty("webui.content_disposition_format", "*"); + + context.turnOffAuthorisationSystem(); + Community community = CommunityBuilder.createCommunity(context).build(); + Collection collection = CollectionBuilder.createCollection(context, community).build(); + Item item = ItemBuilder.createItem(context, collection).build(); + String content = "Test Content"; + Bitstream csv; + Bitstream jpg; + Bitstream mpg; + Bitstream pdf; + try (InputStream is = IOUtils.toInputStream(content, CharEncoding.UTF_8)) { + csv = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("text/csv").build(); + jpg = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("image/jpeg").build(); + mpg = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("video/mpeg").build(); + pdf = BitstreamBuilder.createBitstream(context, item, is) + .withMimeType("application/pdf").build(); + } + context.restoreAuthSystemState(); + + // All formats should be download only + verifyBitstreamDownload(csv, "text/csv;charset=UTF-8", true); + verifyBitstreamDownload(jpg, "image/jpeg;charset=UTF-8", true); + verifyBitstreamDownload(mpg, "video/mpeg;charset=UTF-8", true); + verifyBitstreamDownload(pdf, "application/pdf;charset=UTF-8", true); + } + + private void verifyBitstreamDownload(Bitstream file, String contentType, boolean shouldDownload) throws Exception { String token = getAuthToken(admin.getEmail(), password); String header = getClient(token).perform(get("/api/core/bitstreams/" + file.getID() + "/content") @@ -1225,11 +1305,15 @@ private void verifyBitstreamDownload(Bitstream file, String contentType, boolean .andExpect(content().contentType(contentType)) .andReturn().getResponse().getHeader("content-disposition"); if (shouldDownload) { - assertTrue(header.contains("attachment")); - assertFalse(header.contains("inline")); + assertTrue("Content-Disposition should contain 'attachment' for " + contentType, + header.contains("attachment")); + assertFalse("Content-Disposition should NOT contain 'inline' for " + contentType, + header.contains("inline")); } else { - assertTrue(header.contains("inline")); - assertFalse(header.contains("attachment")); + assertTrue("Content-Disposition should contain 'inline' for " + contentType, + header.contains("inline")); + assertFalse("Content-Disposition should NOT contain 'attachment' for " + contentType, + header.contains("attachment")); } } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java index cbcf970547f7..51300fe10348 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java @@ -13,8 +13,6 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; -import javax.servlet.ServletException; - import org.dspace.app.rest.test.AbstractControllerIntegrationTest; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; @@ -85,18 +83,20 @@ public void testSitemap_notValidSiteMapFile() throws Exception { .andExpect(status().isNotFound()); } - @Test(expected = ServletException.class) + @Test public void testSitemap_fileSystemTraversal_dspaceCfg() throws Exception { //** WHEN ** //We attempt to use endpoint for malicious file system traversal - getClient().perform(get("/" + SITEMAPS_ENDPOINT + "/%2e%2e/config/dspace.cfg")); + getClient().perform(get("/" + SITEMAPS_ENDPOINT + "/%2e%2e/config/dspace.cfg")) + .andExpect(status().isForbidden()); } - @Test(expected = ServletException.class) + @Test public void testSitemap_fileSystemTraversal_dspaceCfg2() throws Exception { //** WHEN ** //We attempt to use endpoint for malicious file system traversal - getClient().perform(get("/" + SITEMAPS_ENDPOINT + "/%2e%2e%2fconfig%2fdspace.cfg")); + getClient().perform(get("/" + SITEMAPS_ENDPOINT + "/%2e%2e%2fconfig%2fdspace.cfg")) + .andExpect(status().isForbidden()); } @Test diff --git a/dspace-server-webapp/src/test/java/org/dspace/curate/CurationScriptIT.java b/dspace-server-webapp/src/test/java/org/dspace/curate/CurationScriptIT.java index a528f4351356..aa32e9e932fd 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/curate/CurationScriptIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/curate/CurationScriptIT.java @@ -36,6 +36,7 @@ import org.dspace.content.Item; import org.dspace.content.ProcessStatus; import org.dspace.scripts.DSpaceCommandLineParameter; +import org.junit.Ignore; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -198,6 +199,7 @@ public void curateScript_InvalidScope() throws Exception { .andExpect(status().isBadRequest()); } + @Ignore @Test public void curateScript_InvalidTaskFile() throws Exception { String token = getAuthToken(admin.getEmail(), password); @@ -270,6 +272,7 @@ public void curateScript_validRequest_Task() throws Exception { } } + @Ignore @Test public void curateScript_validRequest_TaskFile() throws Exception { context.turnOffAuthorisationSystem(); diff --git a/dspace-swordv2/src/main/java/org/dspace/sword2/SimpleZipContentIngester.java b/dspace-swordv2/src/main/java/org/dspace/sword2/SimpleZipContentIngester.java index bd8301617ba9..7899bf861b32 100644 --- a/dspace-swordv2/src/main/java/org/dspace/sword2/SimpleZipContentIngester.java +++ b/dspace-swordv2/src/main/java/org/dspace/sword2/SimpleZipContentIngester.java @@ -138,12 +138,18 @@ private List unzipToBundle(Context context, File depositFile, Enumeration zenum = zip.entries(); while (zenum.hasMoreElements()) { ZipEntry entry = (ZipEntry) zenum.nextElement(); + String entryName = entry.getName(); + java.nio.file.Path entryPath = java.nio.file.Paths.get(entryName).normalize(); + if (entryPath.isAbsolute() || entryPath.startsWith("..")) { + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Invalid zip entry: " + entryName); + } + InputStream stream = zip.getInputStream(entry); Bitstream bs = bitstreamService.create(context, target, stream); BitstreamFormat format = this - .getFormat(context, entry.getName()); + .getFormat(context, entryName); bs.setFormat(context, format); - bs.setName(context, entry.getName()); + bs.setName(context, entryName); bitstreamService.update(context, bs); derivedResources.add(bs); } diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 09b8a1c59814..1d530e4331a1 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -169,6 +169,7 @@ mail.from.address = dspace-noreply@myu.edu # will use the above settings to create a Session. #mail.session.name = Session + # When feedback is submitted via the Feedback form, it is sent to this address # Currently limited to one recipient! # if this property is empty or commented out, feedback form is disabled @@ -235,6 +236,22 @@ mail.message.headers = charset # Helpdesk telephone. Not email, but should be with other contact info. Optional. #mail.message.helpdesk.telephone = +1 555 555 5555 +# Allowed configuration properties, to pass in a "config" map to email and LDN templates. +# This allows templates to easily access dynamic configuration properties, without +# exposing sensitive information to the templating engine +message.templates.allowed-config = dspace.name +message.templates.allowed-config = dspace.shortname +message.templates.allowed-config = dspace.ui.url +message.templates.allowed-config = mail.helpdesk +message.templates.allowed-config = mail.message.helpdesk.telephone +message.templates.allowed-config = mail.admin +message.templates.allowed-config = mail.admin.name + +# Whether to run Velocity in strict mode (null parameter values in templates for LDN or Email will result +# in an Exception instead of a blank string) +# Default: false (this can introduce unwanted side-effects if e.g. a submitter eperson is deleted for a workflow task) +#message.templates.strict_mode = false + ##### Asset Storage (bitstreams / files) ###### # Moved to config/spring/api/bitstore.xml @@ -1375,14 +1392,15 @@ webui.content_disposition_threshold = 8388608 #### Content Attachment Disposition Formats #### # -# Set which mimetypes, file extensions will NOT be opened inline -# Files with these mimetypes/extensions will always be downloaded, -# regardless of the threshold above +# Set which mimetypes or file extensions will NOT be opened inline. +# Files with these mimetypes/extensions will always be downloaded, regardless of the threshold above. +# NOTE: For security reasons, some file formats (e.g. HTML, XML, RDF, JS) will always be downloaded regardless +# of the settings here. This blocks these formats from executing embedded JavaScript when opened inline. +# For additional security, you may choose to set this to "*" to force all formats to always be downloaded +# (i.e. disables all formats from opening inline within the user's browser). +# +# By default, RTF is always downloaded because most browsers attempt to display it as plain text. webui.content_disposition_format = text/richtext -webui.content_disposition_format = text/html -webui.content_disposition_format = text/javascript -webui.content_disposition_format = text/xml -webui.content_disposition_format = rdf #### Multi-file HTML document/site settings ##### # TODO: UNSUPPORTED in DSpace 7.0. May be re-added in a later release diff --git a/dspace/config/modules/curate.cfg b/dspace/config/modules/curate.cfg index 1d7b87960df1..bb50aa811fcf 100644 --- a/dspace/config/modules/curate.cfg +++ b/dspace/config/modules/curate.cfg @@ -26,3 +26,15 @@ curate.taskqueue.dir = ${dspace.dir}/ctqueues # (optional) directory location of scripted (non-java) tasks # curate.script.dir = ${dspace.dir}/ctscripts + +# allowed base path(s) of curation task files +# it is recommended to restrict this path as much as possible +# so that the DSpace Processes framework may only load files as "tasks" +# from a trusted location. For multiple paths, repeat this configuration +# property for each trusted path +# Default: ${dspace.dir} +#curate.taskfile.base = ${dspace.dir} + +# allowed base path of reporter output. +# Default: ${dspace.dir}/log +#curate.reporter.base = ${dspace.dir}/log diff --git a/dspace/config/modules/oai.cfg b/dspace/config/modules/oai.cfg index 98b10f59dee9..b1ec11447f37 100644 --- a/dspace/config/modules/oai.cfg +++ b/dspace/config/modules/oai.cfg @@ -132,3 +132,10 @@ oai.harvester.unknownSchema = fail # when attempting to find the handle of harvested items. If there is a match with # this config parameter, a new handle will be minted instead. Default value: 123456789. #oai.harvester.rejectedHandlePrefix = 123456789, myTestHandle + +# If ingesting files with ORE, only files with URLs that match the base URL of the remote +# OAI endpoint's domain name are accepted, or a list of other URL prefixes defined below +#oai.harvester.ore.file.validateUrlPrefix = true +# Prefixes that are allowed globally (for any endpoint) are below +#oai.harvester.ore.file.allowedUrlPrefix = dspace.myinstitution.edu +#oai.harvester.ore.file.allowedUrlPrefix = files.myinstitution.edu diff --git a/dspace/config/registries/bitstream-formats.xml b/dspace/config/registries/bitstream-formats.xml index e7badbcdecd0..cb3b81165785 100644 --- a/dspace/config/registries/bitstream-formats.xml +++ b/dspace/config/registries/bitstream-formats.xml @@ -791,4 +791,13 @@ mp3 + + text/javascript + JavaScript + JavaScript + 1 + false + js + +