001package bradleyross.common;
002import java.io.*;
003import java.net.*;
004import java.io.IOException;
005/**
006 * Provide a set of methods that will help in processing
007 * the contents of web pages.
008 * <p>Some methods need to be added for escaping and unescaping
009 *    character strings.  For text that appears in web pages, this
010 *    would include the handling of ampersands, less than, and greater
011 *    than symbols</p>
012 * <p>It may also be necessary to include something for the handling of material
013 *    inside quoted strings.  (both single and double quotes)</p>
014 * 
015 * @author Bradley Ross
016 */
017public class httpHelper
018{
019        /** 
020         * Controls amount of diagnostic output.
021         * 
022         * @see #getDebugLevel()
023         * @see #setDebugLevel(int)
024         */
025        protected int debugLevel = 0;
026        /**
027         * Getter for debugLevel.
028         * 
029         * @return Value of debugLevel
030         * 
031         * @see #debugLevel
032         */
033        public int getDebugLevel()
034        { return debugLevel; }
035        /**
036         * Setter for debugLevel.
037         * @param value Value for debugLevel
038         * @see #debugLevel
039         */
040        public void setDebugLevel(int value)
041        { debugLevel = value; }
042        /**
043         * Copy the contents of a web page into a String object
044         * <p>It may be advantageous to have these routines able to
045         *    time out in the same way as the methods in
046         *    {@link bradleyross.library.helpers.FileHelpers}.
047         *    The first thing to do would be to use the set timeout
048         *    methods for Socket.</p>
049         * @param host Domain name or IP address of web server (Domain name only. 
050         * e.g. www.cnn.com)
051         * @param port Port number for web server
052         * @param fileName Directory and name for web page (No leading slash.
053         *   e.g. index.html)
054         * @return string containing contents of web page
055         * @throws java.io.IOException if IO error occurs
056         * 
057         * @see java.net.Socket
058         */
059        public String readHttpPage(String host, int port, String fileName)
060        throws java.io.IOException
061        {
062                String newLine;
063                Socket sock;
064                long contentLength = 0;
065                long charactersRead = 0;
066                StringBuffer body = new StringBuffer();
067                char buffer[] = new char[1024];
068                try 
069                {
070                        sock = new Socket(host, port);
071                        sock.setSoTimeout(10000);
072                        PrintWriter output = new PrintWriter(sock.getOutputStream());
073                        BufferedReader input = 
074                                new BufferedReader(new InputStreamReader(sock.getInputStream()));
075                        String location = "/" + fileName;
076                        output.print("GET " + location + " HTTP/1.1 \r\n");
077                        output.print("HOST: " + host);
078                        output.print("\r\n");
079                        output.print("\r\n");
080                        output.flush();
081                        /*
082                         * Read headers
083                         *
084                         * The headers have a header name, a colon character,
085                         * and then the value for that header.  The only
086                         * header affecting the treatment of the page is
087                         * content-length.
088                         */
089                        while ((newLine = input.readLine()) != null)
090                        {
091                                if (newLine.trim().length() == 0) {break; }
092                                if ( newLine.indexOf(":") > 0)
093                                {
094                                        String type = newLine.substring(0, newLine.indexOf(":")).trim();
095                                        String value = newLine.substring(newLine.indexOf(":") + 1).trim();
096                                        if (type.equalsIgnoreCase("content-length"))
097                                        {
098                                                try
099                                                { contentLength = Long.parseLong(value); }
100                                                catch (Exception e) { }
101                                        }
102                                }
103                        }
104                        /* 
105                         * Read body of transaction response
106                         */
107                        if (contentLength <= (long) 0)
108                        {
109                                while ((newLine = input.readLine()) != null)
110                                { body = body.append(newLine + "\r\n"); }
111                        }
112                        else
113                        {
114                                StringBuffer buildUp = new StringBuffer();
115                                int packetLength;
116                                charactersRead = (long) 0;
117                                while ((packetLength = input.read(buffer, 0, 1024)) >= 0)
118                                {
119                                        charactersRead += (long) packetLength;
120                                        buildUp.append(new String(buffer, 0, packetLength));
121                                        if (charactersRead >= contentLength) {break;}
122                                }
123                                body = buildUp;
124                        }
125                        sock.close();
126                        if (debugLevel > 0)
127                        {System.out.print(" " + Long.toString(charactersRead) + 
128                                        " characters read "); }
129                } /*  End of try block */
130                catch (IOException e)
131                {
132                    System.out.println("Exception encountered in readHttpPage");
133                    System.out.println(e.getClass().getName() + " : " +
134                            e.getMessage());
135                        e.printStackTrace(System.out);
136                        throw new java.io.IOException("Unable to read HTTP page");
137                }
138                return new String(body);
139        } /* End of method readHttpPage */
140} /* End of class httpHelper */
141