001package bradleyross.common; 002import java.io.*; 003import java.net.*; 004/** 005 * Provide a set of methods that will help in processing 006 * the contents of web pages. 007 * <p>Some methods need to be added for escaping and unescaping 008 * character strings. For text that appears in web pages, this 009 * would include the handling of ampersands, less than, and greater 010 * than symbols</p> 011 * <p>It may also be necessary to include something for the handling of material 012 * inside quoted strings. (both single and double quotes)</p> 013 * 014 * @author Bradley Ross 015 */ 016public class httpHelper 017{ 018 /** 019 * Controls amount of diagnostic output. 020 * 021 * @see #getDebugLevel() 022 * @see #setDebugLevel(int) 023 */ 024 protected int debugLevel = 0; 025 /** 026 * Getter for debugLevel. 027 * 028 * @return Value of debugLevel 029 * 030 * @see #debugLevel 031 */ 032 public int getDebugLevel() 033 { return debugLevel; } 034 /** 035 * Setter for debugLevel. 036 * @param value Value for debugLevel 037 * @see #debugLevel 038 */ 039 public void setDebugLevel(int value) 040 { debugLevel = value; } 041 /** 042 * Copy the contents of a web page into a String object 043 * <p>It may be advantageous to have these routines able to 044 * time out in the same way as the methods in 045 * {@link bradleyross.library.helpers.FileHelpers}. 046 * The first thing to do would be to use the set timeout 047 * methods for Socket.</p> 048 * @param host Domain name or IP address of web server (Domain name only. 049 * e.g. www.cnn.com) 050 * @param port Port number for web server 051 * @param fileName Directory and name for web page (No leading slash. 052 * e.g. index.html) 053 * @see java.net.Socket 054 */ 055 public String readHttpPage(String host, int port, String fileName) 056 throws java.io.IOException 057 { 058 String newLine; 059 Socket sock; 060 long contentLength = 0; 061 long charactersRead = 0; 062 StringBuffer body = new StringBuffer(); 063 char buffer[] = new char[1024]; 064 try 065 { 066 sock = new Socket(host, port); 067 sock.setSoTimeout(10000); 068 PrintWriter output = new PrintWriter(sock.getOutputStream()); 069 BufferedReader input = 070 new BufferedReader(new InputStreamReader(sock.getInputStream())); 071 String location = "/" + fileName; 072 output.print("GET " + location + " HTTP/1.1 \r\n"); 073 output.print("HOST: " + host); 074 output.print("\r\n"); 075 output.print("\r\n"); 076 output.flush(); 077 /* 078 * Read headers 079 * 080 * The headers have a header name, a colon character, 081 * and then the value for that header. The only 082 * header affecting the treatment of the page is 083 * content-length. 084 */ 085 while ((newLine = input.readLine()) != null) 086 { 087 if (newLine.trim().length() == 0) {break; } 088 if ( newLine.indexOf(":") > 0) 089 { 090 String type = newLine.substring(0, newLine.indexOf(":")).trim(); 091 String value = newLine.substring(newLine.indexOf(":") + 1).trim(); 092 if (type.equalsIgnoreCase("content-length")) 093 { 094 try 095 { contentLength = Long.parseLong(value); } 096 catch (Exception e) { } 097 } 098 } 099 } 100 /* 101 * Read body of transaction response 102 */ 103 if (contentLength <= (long) 0) 104 { 105 while ((newLine = input.readLine()) != null) 106 { body = body.append(newLine + "\r\n"); } 107 } 108 else 109 { 110 StringBuffer buildUp = new StringBuffer(); 111 int packetLength; 112 charactersRead = (long) 0; 113 while ((packetLength = input.read(buffer, 0, 1024)) >= 0) 114 { 115 charactersRead += (long) packetLength; 116 buildUp.append(new String(buffer, 0, packetLength)); 117 if (charactersRead >= contentLength) {break;} 118 } 119 body = buildUp; 120 } 121 sock.close(); 122 if (debugLevel > 0) 123 {System.out.print(" " + Long.toString(charactersRead) + 124 " characters read "); } 125 } /* End of try block */ 126 catch (IOException e) 127 { 128 System.out.println("Exception encountered in readHttpPage"); 129 System.out.println(e.getClass().getName() + " : " + 130 e.getMessage()); 131 e.printStackTrace(System.out); 132 throw new java.io.IOException("Unable to read HTTP page"); 133 } 134 return new String(body); 135 } /* End of method readHttpPage */ 136} /* End of class httpHelper */ 137