001package bradleyross.common;
002import java.io.*;
003import java.net.*;
004/**
005 * Provide a set of methods that will help in processing
006 * the contents of web pages.
007 * <p>Some methods need to be added for escaping and unescaping
008 *    character strings.  For text that appears in web pages, this
009 *    would include the handling of ampersands, less than, and greater
010 *    than symbols</p>
011 * <p>It may also be necessary to include something for the handling of material
012 *    inside quoted strings.  (both single and double quotes)</p>
013 * 
014 * @author Bradley Ross
015 */
016public class httpHelper
017{
018        /** 
019         * Controls amount of diagnostic output.
020         * 
021         * @see #getDebugLevel()
022         * @see #setDebugLevel(int)
023         */
024        protected int debugLevel = 0;
025        /**
026         * Getter for debugLevel.
027         * 
028         * @return Value of debugLevel
029         * 
030         * @see #debugLevel
031         */
032        public int getDebugLevel()
033        { return debugLevel; }
034        /**
035         * Setter for debugLevel.
036         * @param value Value for debugLevel
037         * @see #debugLevel
038         */
039        public void setDebugLevel(int value)
040        { debugLevel = value; }
041        /**
042         * Copy the contents of a web page into a String object
043         * <p>It may be advantageous to have these routines able to
044         *    time out in the same way as the methods in
045         *    {@link bradleyross.library.helpers.FileHelpers}.
046         *    The first thing to do would be to use the set timeout
047         *    methods for Socket.</p>
048         * @param host Domain name or IP address of web server (Domain name only. 
049         * e.g. www.cnn.com)
050         * @param port Port number for web server
051         * @param fileName Directory and name for web page (No leading slash.
052         *   e.g. index.html)
053         * @see java.net.Socket
054         */
055        public String readHttpPage(String host, int port, String fileName)
056        throws java.io.IOException
057        {
058                String newLine;
059                Socket sock;
060                long contentLength = 0;
061                long charactersRead = 0;
062                StringBuffer body = new StringBuffer();
063                char buffer[] = new char[1024];
064                try 
065                {
066                        sock = new Socket(host, port);
067                        sock.setSoTimeout(10000);
068                        PrintWriter output = new PrintWriter(sock.getOutputStream());
069                        BufferedReader input = 
070                                new BufferedReader(new InputStreamReader(sock.getInputStream()));
071                        String location = "/" + fileName;
072                        output.print("GET " + location + " HTTP/1.1 \r\n");
073                        output.print("HOST: " + host);
074                        output.print("\r\n");
075                        output.print("\r\n");
076                        output.flush();
077                        /*
078                         * Read headers
079                         *
080                         * The headers have a header name, a colon character,
081                         * and then the value for that header.  The only
082                         * header affecting the treatment of the page is
083                         * content-length.
084                         */
085                        while ((newLine = input.readLine()) != null)
086                        {
087                                if (newLine.trim().length() == 0) {break; }
088                                if ( newLine.indexOf(":") > 0)
089                                {
090                                        String type = newLine.substring(0, newLine.indexOf(":")).trim();
091                                        String value = newLine.substring(newLine.indexOf(":") + 1).trim();
092                                        if (type.equalsIgnoreCase("content-length"))
093                                        {
094                                                try
095                                                { contentLength = Long.parseLong(value); }
096                                                catch (Exception e) { }
097                                        }
098                                }
099                        }
100                        /* 
101                         * Read body of transaction response
102                         */
103                        if (contentLength <= (long) 0)
104                        {
105                                while ((newLine = input.readLine()) != null)
106                                { body = body.append(newLine + "\r\n"); }
107                        }
108                        else
109                        {
110                                StringBuffer buildUp = new StringBuffer();
111                                int packetLength;
112                                charactersRead = (long) 0;
113                                while ((packetLength = input.read(buffer, 0, 1024)) >= 0)
114                                {
115                                        charactersRead += (long) packetLength;
116                                        buildUp.append(new String(buffer, 0, packetLength));
117                                        if (charactersRead >= contentLength) {break;}
118                                }
119                                body = buildUp;
120                        }
121                        sock.close();
122                        if (debugLevel > 0)
123                        {System.out.print(" " + Long.toString(charactersRead) + 
124                                        " characters read "); }
125                } /*  End of try block */
126                catch (IOException e)
127                {
128                    System.out.println("Exception encountered in readHttpPage");
129                    System.out.println(e.getClass().getName() + " : " +
130                            e.getMessage());
131                        e.printStackTrace(System.out);
132                        throw new java.io.IOException("Unable to read HTTP page");
133                }
134                return new String(body);
135        } /* End of method readHttpPage */
136} /* End of class httpHelper */
137