« tomcat user runs fedora | Main | ESciDoc TOC (Table Of Contents) »

Config file change to make DSAPCE properly handle unicode filenames

On strip1 the AgEcon instance was not properly downloading files that had non-ascii file names. That is it was not handling unicode characters correctly. This was corrected by fixing a config file. File to edit on strip1: tu nano tomcat/conf/server.xml Old bad line: <!-- Define an AJP 1.3 Connector on port 8009 --> <Connector port="8009" UIEncoding="UTF-8" tomcatAuthentication="false" enableLookups="false" redirectPort="8443" protocol="AJP/1.3" /> New fixed line: <!-- Define an AJP 1.3 Connector on port 8009 --> <Connector port="8009" URIEncoding="UTF-8" tomcatAuthentication="false" enableLookups="false" redirectPort="8443" protocol="AJP/1.3" />
i.e. change UIEncoding to URIEncoding

Things learned along the way:
1) Location of constant to encode strings as UTF-8 in DSPACE
./src/org/dspace/core/Constants.java:209: public static final String DEFAULT_ENCODING = "UTF-8";
2) Servlet that does downloads of pdf's
<servlet> <servlet-name>bitstream</servlet-name> <servlet-class>org.dspace.app.webui.servlet.BitstreamServlet</servlet-class> </servlet> line 165 of ./etc/dspace-web.xml
3) Code from ./src/org/dspace/app/webui/servlet/BitstreamServlet.java that does upload: protected void doDSGet(Context context, HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException, SQLException, AuthorizeException { Item item = null; Bitstream bitstream = null; System.out.println("In dspace proper"); // Get the ID from the URL String idString = request.getPathInfo(); String handle = ""; String sequenceText = ""; String filename = null; int sequenceID; System.out.println("1 idString " + idString ); // Parse 'identifier' and 'sequence' (bitstream seq. number) out // of remaining URL path, which is typically of the format: // {identifier}/{sequence}/{bitstream-name} // But since the bitstream name MAY have any number of "/"s in // it, we scan from the start to pick out the sequence: String [] pathArray = HandleManager.splitIdentifier(idString); handle = pathArray[0]; System.out.println("1.5 handle " + handle ); String extraInfo = pathArray[1]; System.out.println("2 extraInfo " + extraInfo ); if(extraInfo != null) { // Remove leading slash if any: if(extraInfo.startsWith("/")) { extraInfo = extraInfo.substring(1); } // The sequence is before the first slash, everything // else is part of the bitstream-name. int slashIndex = extraInfo.indexOf('/'); if(slashIndex != -1) { sequenceText = extraInfo.substring(0,slashIndex); filename = extraInfo.substring(slashIndex+1); } } System.out.println("3 sequenceText " + sequenceText ); try { sequenceID = Integer.parseInt(sequenceText); System.out.println("4 sequenceID " + sequenceID ); } catch (NumberFormatException nfe) { sequenceID = -1; } // Now try and retrieve the item DSpaceObject dso = HandleManager.resolveToObject(context, handle); // Make sure we have valid item and sequence number if (dso != null && dso.getType() == Constants.ITEM && sequenceID >= 0) { item = (Item) dso; if (item.isWithdrawn()) { log.info(LogManager.getHeader(context, "view_bitstream", "handle=" + handle + ",withdrawn=true")); JSPManager.showJSP(request, response, "/tombstone.jsp"); return; } boolean found = false; Bundle[] bundles = item.getBundles(); for (int i = 0; (i < bundles.length) && !found; i++) { Bitstream[] bitstreams = bundles[i].getBitstreams(); for (int k = 0; (k < bitstreams.length) && !found; k++) { if (sequenceID == bitstreams[k].getSequenceID()) { bitstream = bitstreams[k]; found = true; } } } } if (bitstream == null || filename == null || !filename.equals(bitstream.getName())) { // No bitstream found or filename was wrong -- ID invalid log.info(LogManager.getHeader(context, "invalid_id", "path=" + idString)); JSPManager.showInvalidIDError(request, response, idString, Constants.BITSTREAM); return; } // log.fatal(LogManager.getHeader(context, "view_bitstream", // "bitstream_id=" + bitstream.getID())); // Modification date // TODO: Currently the date of the item, since we don't have dates // for files response.setDateHeader("Last-Modified", item.getLastModified().getTime()); // Check for if-modified-since header long modSince = request.getDateHeader("If-Modified-Since"); if (modSince != -1 && item.getLastModified().getTime() < modSince) { // Item has not been modified since requested date, // hence bitstream has not; return 304 response.setStatus(HttpServletResponse.SC_NOT_MODIFIED); return; } // Pipe the bits InputStream is = bitstream.retrieve(); // Set the response MIME type response.setContentType(bitstream.getFormat().getMIMEType()); // Response length response.setHeader("Content-Length", String.valueOf(bitstream.getSize())); Utils.bufferedCopy(is, response.getOutputStream()); is.close(); response.getOutputStream().flush(); } 4) html generated for download before the fix: <tr><td headers="t1" class="standard">12_Felföldi_Apstract.pdf</td><td headers="t2" class="standard"></td><td headers="t3" class="standard">77Kb</td><td headers="t4" class="standard">PDF</td><td class="standard" align="center"><a target="_blank" href="/bitstream/55410/3/12_Felf%c3%b6ldi_Apstract.pdf">View/Open</a></td></tr> note 12_Felföldi_Apstract.pdf != 12_Felf%c3%b6ldi_Apstract.pdf 5) servlet that generates the above html is: ./src/org/dspace/app/webui/jsptag/ItemTag.java

Post a comment

(If you haven't left a comment here before, you may need to be approved by the site owner before your comment will appear. Until then, it won't appear on the entry. Thanks for waiting.)