file corrupted when I post it to the servlet using GZIPOutputStream

I tried to modify @BalusC excellent tutorial here to send gziped compressed files. This is a working java class :

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPOutputStream;

public final class NetworkService {

    // *** EDIT THOSE AS APROPRIATE
    private static final String FILENAME = "C:/Dropbox/TMP.txt";
    private static final String URL =
            "http://192.168.1.64:8080/DataCollectionServlet/";
    // *** END EDIT
    private static final CharSequence CRLF = "rn";
    private static boolean isServerGzip = true; // ***
    private static String charsetForMultipartHeaders = "UTF-8";

    public static void main(String[] args) {
        HttpURLConnection connection = null;
        OutputStream serverOutputStream = null;
        try {
            File file = new File(FILENAME);
            final String boundary = Long
                    .toHexString(System.currentTimeMillis());
            connection = connection(true, boundary);
            serverOutputStream = connection.getOutputStream();
            try {
                flushMultiPartData(file, serverOutputStream, boundary);
            } catch (IOException e) {}
            System.out.println(connection.getResponseCode()); // 200
        } catch (IOException e) {
            // Network unreachable : not connected
            // No route to host : probably on an encrypted network
            // Connection timed out : Server DOWN
        } finally {
            if (connection != null) connection.disconnect();
        }
    }

    private static HttpURLConnection connection(boolean isMultiPart,
            String boundary) throws MalformedURLException, IOException {
        HttpURLConnection connection = (HttpURLConnection) new URL(URL)
                .openConnection();
        connection.setDoOutput(true); // triggers POST
        connection.setUseCaches(false); // *** no difference
        connection.setRequestProperty("Connection", "Keep-Alive");
        connection.setRequestProperty("User-Agent",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) "
                + "Gecko/20100401"); // *** tried others no difference
        connection.setChunkedStreamingMode(1024); // *** no difference
        if (isMultiPart) {
            if (boundary == null || "".equals(boundary.trim()))
                throw new IllegalArgumentException("Boundary can't be "
                    + ((boundary == null) ? "null" : "empty"));
            connection.setRequestProperty("Content-Type",
                "multipart/form-data; boundary=" + boundary);
        }
        return connection;
    }

    // =========================================================================
    // Multipart
    // =========================================================================
    private static void flushMultiPartData(File file,
            OutputStream serverOutputStream, String boundary)
            throws IOException {
        PrintWriter writer = null;
        try {
            // true = autoFlush, important!
            writer = new PrintWriter(new OutputStreamWriter(serverOutputStream,
                    charsetForMultipartHeaders), true);
            appendBinary(file, boundary, writer, serverOutputStream);
            // End of multipart/form-data.
            writer.append("--" + boundary + "--").append(CRLF);
        } finally {
            if (writer != null) writer.close();
        }
    }

    private static void appendBinary(File file, String boundary,
            PrintWriter writer, OutputStream output)
            throws FileNotFoundException, IOException {
        // Send binary file.
        writer.append("--" + boundary).append(CRLF);
        writer.append(
            "Content-Disposition: form-data; name="binaryFile"; filename=""
                + file.getName() + """).append(CRLF);
        writer.append(
            "Content-Type: " // ***
                + ((isServerGzip) ? "application/gzip" : URLConnection
                        .guessContentTypeFromName(file.getName())))
                .append(CRLF);
        writer.append("Content-Transfer-Encoding: binary").append(CRLF);
        writer.append(CRLF).flush();
        InputStream input = null;
        OutputStream output2 = output;
        if (isServerGzip) {
            output2 = new GZIPOutputStream(output);
        }
        try {
            input = new FileInputStream(file);
            byte[] buffer = new byte[1024]; // *** tweaked, no difference
            for (int length = 0; (length = input.read(buffer)) > 0;) {
                output2.write(buffer, 0, length);
            }
            output2.flush(); // Important! Output cannot be closed. Close of
            // writer will close output as well.
        } finally {
            if (input != null) try {
                input.close();
            } catch (IOException logOrIgnore) {}
        }
        writer.append(CRLF).flush(); // CRLF is important! It indicates end of
        // binary boundary.
    }
}

You have to edit FILENAME and URL fields and set up a servlet in the URL – its doPost() method is :

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp)
        throws ServletException, IOException {
    Collection<Part> parts = req.getParts();
    for (Part part : parts) {
        File save = new File(uploadsDirName, getFilename(part) + "_"
            + System.currentTimeMillis() + ".zip");
        final String absolutePath = save.getAbsolutePath();
        log.debug(absolutePath);
        part.write(absolutePath);
        sc.getRequestDispatcher(DATA_COLLECTION_JSP).forward(req, resp);
    }
}

Now when isServerGzip field is set to true the FILENAME is compressed alright and send to the server but when I try to extract it it is corrupted (I use 7z on windows which opens the gzip file as archive but when I try to extract the file inside the gzip archive it says it is corrupted – though it does extract the (corrupted indeed) file). Tried with various files – the larger ones end up corrupted at some point the smaller ones extract as empty – the reported size of the larger files inside the archive is much larger than the real size while of the smaller ones 0. I marked the parts that need attention as // ***. I might miss some connection configuration or my way of gzipping the stream might be plain wrong or…?
Tried tweaking connection properties, the buffers, caches etc to no avail

Answer

You need to call

((GZIPOutputStream)output2).finish();

before flushing. See the javadoc here. It states

Finishes writing compressed data to the output stream without closing the underlying stream. Use this method when applying multiple filters in succession to the same output stream.

Which is what you are doing. So

for (int length = 0; (length = input.read(buffer)) > 0;) 
    output2.write(buffer, 0, length);
}
((GZIPOutputStream)output2).finish(); //Write the compressed parts
// obviously make sure output2 is truly GZIPOutputStream
output2.flush(); // 

On the subject of applying multiple filters in succession to the same output stream, this is how I understand it:

You have an OutputStream, that is a socket connection, to the HTTP server. The HttpUrlConnection writes the headers and then you write the body directly. In this situation (multipart), you send the boundary and headers as unzipped bytes, the zipped file content, and then again the boundary. So the stream looks like this in the end:

                            start writing with GZIPOutputStream
                                          v
    |---boundary---|---the part headers---|---gzip encoded file content bytes---|---boundary---|
    ^                                                                           ^
write directly with PrintWriter                                      use PrintWriter again

So you can see how you have different parts written in succession with different filters. Think of the PrintWriter as an unfiltered filter, anything you give it is written directly. The GZIPOutputStream is a gzip filter, it encodes (gzips) the bytes it’s given.

As for the source code, look in your Java JDK installation, you should have a src.zip file that contains the public source code, java.lang*, java.util.*, java.io.*, javax.*, etc.

Leave a Reply

Your email address will not be published. Required fields are marked *