001package bradleyross.common; 002import java.io.*; 003import java.net.*; 004import java.io.IOException; 005/** 006 * Provide a set of methods that will help in processing 007 * the contents of web pages. 008 * <p>Some methods need to be added for escaping and unescaping 009 * character strings. For text that appears in web pages, this 010 * would include the handling of ampersands, less than, and greater 011 * than symbols</p> 012 * <p>It may also be necessary to include something for the handling of material 013 * inside quoted strings. (both single and double quotes)</p> 014 * 015 * @author Bradley Ross 016 */ 017public class httpHelper 018{ 019 /** 020 * Controls amount of diagnostic output. 021 * 022 * @see #getDebugLevel() 023 * @see #setDebugLevel(int) 024 */ 025 protected int debugLevel = 0; 026 /** 027 * Getter for debugLevel. 028 * 029 * @return Value of debugLevel 030 * 031 * @see #debugLevel 032 */ 033 public int getDebugLevel() 034 { return debugLevel; } 035 /** 036 * Setter for debugLevel. 037 * @param value Value for debugLevel 038 * @see #debugLevel 039 */ 040 public void setDebugLevel(int value) 041 { debugLevel = value; } 042 /** 043 * Copy the contents of a web page into a String object 044 * <p>It may be advantageous to have these routines able to 045 * time out in the same way as the methods in 046 * {@link bradleyross.library.helpers.FileHelpers}. 047 * The first thing to do would be to use the set timeout 048 * methods for Socket.</p> 049 * @param host Domain name or IP address of web server (Domain name only. 050 * e.g. www.cnn.com) 051 * @param port Port number for web server 052 * @param fileName Directory and name for web page (No leading slash. 053 * e.g. index.html) 054 * @return string containing contents of web page 055 * @throws java.io.IOException if IO error occurs 056 * 057 * @see java.net.Socket 058 */ 059 public String readHttpPage(String host, int port, String fileName) 060 throws java.io.IOException 061 { 062 String newLine; 063 Socket sock; 064 long contentLength = 0; 065 long charactersRead = 0; 066 StringBuffer body = new StringBuffer(); 067 char buffer[] = new char[1024]; 068 try 069 { 070 sock = new Socket(host, port); 071 sock.setSoTimeout(10000); 072 PrintWriter output = new PrintWriter(sock.getOutputStream()); 073 BufferedReader input = 074 new BufferedReader(new InputStreamReader(sock.getInputStream())); 075 String location = "/" + fileName; 076 output.print("GET " + location + " HTTP/1.1 \r\n"); 077 output.print("HOST: " + host); 078 output.print("\r\n"); 079 output.print("\r\n"); 080 output.flush(); 081 /* 082 * Read headers 083 * 084 * The headers have a header name, a colon character, 085 * and then the value for that header. The only 086 * header affecting the treatment of the page is 087 * content-length. 088 */ 089 while ((newLine = input.readLine()) != null) 090 { 091 if (newLine.trim().length() == 0) {break; } 092 if ( newLine.indexOf(":") > 0) 093 { 094 String type = newLine.substring(0, newLine.indexOf(":")).trim(); 095 String value = newLine.substring(newLine.indexOf(":") + 1).trim(); 096 if (type.equalsIgnoreCase("content-length")) 097 { 098 try 099 { contentLength = Long.parseLong(value); } 100 catch (Exception e) { } 101 } 102 } 103 } 104 /* 105 * Read body of transaction response 106 */ 107 if (contentLength <= (long) 0) 108 { 109 while ((newLine = input.readLine()) != null) 110 { body = body.append(newLine + "\r\n"); } 111 } 112 else 113 { 114 StringBuffer buildUp = new StringBuffer(); 115 int packetLength; 116 charactersRead = (long) 0; 117 while ((packetLength = input.read(buffer, 0, 1024)) >= 0) 118 { 119 charactersRead += (long) packetLength; 120 buildUp.append(new String(buffer, 0, packetLength)); 121 if (charactersRead >= contentLength) {break;} 122 } 123 body = buildUp; 124 } 125 sock.close(); 126 if (debugLevel > 0) 127 {System.out.print(" " + Long.toString(charactersRead) + 128 " characters read "); } 129 } /* End of try block */ 130 catch (IOException e) 131 { 132 System.out.println("Exception encountered in readHttpPage"); 133 System.out.println(e.getClass().getName() + " : " + 134 e.getMessage()); 135 e.printStackTrace(System.out); 136 throw new java.io.IOException("Unable to read HTTP page"); 137 } 138 return new String(body); 139 } /* End of method readHttpPage */ 140} /* End of class httpHelper */ 141