import java.net.*;
import java.io.*;
import java.util.*;

/** Parser tilbyder en metode, der kan hente et dokument for en given URL.

 * denne klasse er fix og færdig. Du kan uden videre bruge den.
 */
public class Parser {

  /** parseDocument returnerer til en given URL det tilhørende dokument.
   * Det forudsættes at der er tale om et HTML0 dokument
   * @param url - den URL som er adresse på den ønskede webside
   * @return document - Et Document-object der repræsenterer websiden
   */
  public static  Document parseDocument (URL url) {
    DataInputStream stream;
    TokenSequence tokens;
    try {
      stream = new DataInputStream(url.openConnection().getInputStream());
      tokens = new TokenSequence();
      tokens.tokenize(stream);
      stream.close();
      titleBuffer = new StringBuffer();
      return (new Document(getHTMLSTAR(url,tokens,null),titleBuffer.toString()));
    } catch (IOException x) {return (new Document(null," ")); }
  }
  
  private static int nesting = 0;
  
  private static StringBuffer titleBuffer;
  
  private static void getTitle(TokenSequence tokens) {
    if (tokens.empty()) return;
    if (tokens.space()) titleBuffer.append(" "); 
    if (tokens.word()!=null) titleBuffer.append(tokens.word());
    if (tokens.tag()!=null && !tokens.tag().start &&
	tokens.tag().name.equals("title")) return;
    getTitle(tokens.cut());
  }
  
  private static Document.ITEMSTAR getITEMSTAR(URL base, TokenSequence tokens) {
    Document.ITEM item;
    if (!tokens.empty() && tokens.tag()!=null && 
        tokens.tag().start && tokens.tag().name.equals("li")) {
      item = new Document.ITEM(getHTMLSTAR(base,tokens.cut(),"ul"),nesting);
      return new Document.ITEMSTAR(item,getITEMSTAR(base,tokens));
    } else {
      return null;
    }
  }
  
  private static Document.HTMLSTAR getHTMLSTAR
  (URL base, TokenSequence tokens, String stop) {
    Document.HTMLSTAR htmls = null;
    Document.HTMLSTAR tail = null;
    Document.HTML html = null;
    boolean done = false;
    while (!done && !tokens.empty()) {
      html = getHTML(base,tokens,stop);
      if (html!=null) {
	if (htmls==null) {
	  htmls = new Document.HTMLSTAR(html,null);
	  tail = htmls;
	} else {
	  tail.rest = new Document.HTMLSTAR(html,null);
	  tail = tail.rest;
	}
      } else if (( !tokens.tag().start && stop!=null
		   && stop.equals(tokens.tag().name) )
		 || ( tokens.tag().start && tokens.tag().name.equals("li")
		      && nesting>0 )) {
	done = true;
      }
      if (!done) tokens.cut();
    }
    return htmls;
  }
  
  private static Document.HTML getHTML
  (URL base, TokenSequence tokens, String stop) {
    Document.HTML html = null;
    Document.HTMLSTAR subhtmls;
    String name;
    if (tokens.space()) 
      html = new Document.HTMLspace();
    else if (tokens.word()!=null) 
      html = new Document.HTMLword(tokens.word());
    else {
      name = tokens.tag().name;
      if (!tokens.tag().start) {
	html = null;
      } else if (name.equals("title")) {
	getTitle(tokens);
	html = null;
      }
      else if (name.equals("li") && nesting>0) {
	html = null;
      }
      else if (name.equals("br"))
	html = new Document.HTMLbreak();
      else if (name.equals("p"))
	html = new Document.HTMLparagraph();
      else if (name.equals("hr"))
	html = new Document.HTMLrule();
      else if (name.equals("b")) {
	subhtmls = getHTMLSTAR(base,tokens.cut(),"b");
	html = new Document.HTMLbold(subhtmls);
      }
      else if (name.equals("i")) {
	subhtmls = getHTMLSTAR(base,tokens.cut(),"i");
	html = new Document.HTMLitalics(subhtmls);
      }
      else if (name.equals("h1")) {
	subhtmls = getHTMLSTAR(base,tokens.cut(),"h1");
	html = new Document.HTMLheader(subhtmls);
      }
      else if (name.equals("h2")) {
	subhtmls = getHTMLSTAR(base,tokens.cut(),"h2");
	html = new Document.HTMLheader(subhtmls);
      }
      else if (name.equals("h3")) {
	subhtmls = getHTMLSTAR(base,tokens.cut(),"h3");
	html = new Document.HTMLheader(subhtmls);
      }
      else if (name.equals("a")) {
	URL url;
	try {
	  url = new URL(base,tokens.tag().args.lookup("href"));
	} catch (MalformedURLException x) {
	  url = null;
	}
	subhtmls = getHTMLSTAR(base,tokens.cut(),"a");
	html = new Document.HTMLanchor(url,subhtmls);
      }
      else if (name.equals("img")) {
	URL url;
	try {
	  url = new URL(base,tokens.tag().args.lookup("src"));
	} catch (MalformedURLException x) {
	  url = null;
	}
	html = new Document.HTMLimage(url);
      }
      else if (name.equals("ul")) {
	Document.ITEMSTAR items;
	nesting++;
	items = getITEMSTAR(base,tokens.cut());
	html = new Document.HTMLlist(items);
	nesting--;
      }
    }
    return html;
  }
  
  private static class TokenSequence {
    Vector v;
    char c;
    Token t;
    
    public TokenSequence cut() {
      v.removeElementAt(0);
      return this;
    }
    
    public boolean empty() {
      return v.isEmpty();
    }
    
    public boolean space() {
      return ((Token)v.elementAt(0)).space;
    }
    
    public String word() {
      return ((Token)v.elementAt(0)).word;
    }
    
    public Tag tag() {
      return ((Token)v.elementAt(0)).tag;
    }
    
    private void get(DataInputStream stream) throws IOException {
      int b;
      b = stream.readByte();
      if (b<0) b+=256;
      c = (char)b;
    }
    
    private void skipBlanks(DataInputStream stream) throws IOException {
      while (c==' ' || c == '\t' || c == '\n' || c == '\r') get(stream);
    }
    
    private void readWord(DataInputStream stream) throws IOException {
      StringBuffer b = new StringBuffer();
      while (c!=' ' && c!= '\t' && c!='\n' && c!='<' && c!='&' && c!='\r') {
	b.append(c);
	get(stream);
      }
      t.word = b.toString();
    }
    
    private String readName(DataInputStream stream) {
      StringBuffer b = new StringBuffer();
      try {
	if (c=='"') { 
          get(stream);
          while (c!='"') {
            b.append(c);
            get(stream);
          }
          get(stream);
	} else {
          while (c!=' ' && c!= '\t' && c!='\n' && c!='>' && c!='=' && c!='\r') {
            b.append(c); 
            get(stream);
          }
	}
      }
      catch (IOException x) { return b.toString(); }
      return b.toString();
    }

    private void readTag(DataInputStream stream) throws IOException {
      StringBuffer b = new StringBuffer();
      String left,right;
      t.tag = new Tag();
      skipBlanks(stream);
      if (c=='/') {
	t.tag.start = false;
	get(stream);
      }
      skipBlanks(stream);
      t.tag.name = readName(stream).toLowerCase();
      skipBlanks(stream);
      while (c!='>') {
	left = readName(stream).toLowerCase();
	skipBlanks(stream);
	if (c=='=') {
	  get(stream);
	  skipBlanks(stream);
	  right = readName(stream);
	} else {
	  right = null;
	}
	t.tag.args.add(left,right);
	skipBlanks(stream);
      }
      get(stream);
      if (t.tag.start) skipBlanks(stream);
    }
    
    private void readSpecial(DataInputStream stream) {
      StringBuffer b = new StringBuffer();
      String s;
      try {
	get(stream);
	if (c==' ') {
	  t.word = "&";
	  return;
	}
	if (c=='#') {
	  get(stream);
	  while (c!=';') {
	    b.append(c);
	    get(stream);
	  }
	  get(stream);
	  s = b.toString();
	  try {
	    int i;
	    i = Integer.parseInt(s);
	    char a[] = { (char)i };
	    t.word = new String(a);
	  } catch (NumberFormatException x) {
	    t.word = "&#" + s +";";
	  }
	  return;
	}
	while (c!=';') {
	  b.append(c);
	  get(stream);
	}
	get(stream);
	s = b.toString();
	if (s.equals("amp")) { t.word = "&"; return; }
	if (s.equals("gt")) { t.word = ">"; return; }
	if (s.equals("lt")) { t.word = "<"; return; }
	if (s.equals("quot")) { t.word = "\""; return; }
	if (s.equals("nbsp")) { t.word = " "; return; }
	if (s.equals("aring")) { t.word = "å"; return; }
	if (s.equals("Aring")) { t.word = "Å"; return; }
	if (s.equals("oslash")) { t.word = "ø"; return; }
	if (s.equals("Oslash")) { t.word = "Ø"; return; }
	if (s.equals("aelig")) { t.word = "æ"; return; }
	if (s.equals("AElig")) { t.word = "Æ"; return; }
	t.word = "&" + s +";";
      } catch (IOException x) {}
    }
    
    public void skipMeta(DataInputStream stream) throws IOException {
      boolean zero = true;
      boolean even = true;
      t = null;
      while (true) {
	get(stream);
	if (c=='-') {
	  if (zero) zero=false;
	  else {
	    zero = true;
	    even = !even;
	  }
	} else if (c=='>') {
	  if (even) {
	    get(stream);
	    return;
	  } else zero = true;
	} else zero = true;
      }
    }
    
    public TokenSequence() {
      v = new Vector(0);
    }
    
    public void tokenize(DataInputStream stream) {
      try {
	get(stream);
	while (true) {
	  t = new Token();
	  switch (c) {
          case '\r':
	  case ' ':
	  case '\t':
	  case '\n': skipBlanks(stream);
	    t.space = true;
	    break;
	  case '<':  get(stream);
	    if (c=='!') skipMeta(stream);
	    else readTag(stream);
	    break;
	  case '&':  readSpecial(stream);
	    break;
	  default:   readWord(stream);
	    break;
	  }
	  if (t!=null) v.addElement(t);
	}
      } catch (IOException x) {
        return;
      }
    }
    
    private class Token {
      public String word;
      public Tag tag;
      public boolean space;
      
      public Token() {
	word = null;
	tag = null;
	space = false;
      }
    }
    
    private class Tag{
      public String name;
      public boolean start;
      public Arguments args;
      
      public Tag() {
	this.name = null;
	this.start = true;
	this.args = new Arguments();
      }
    }
    
    private class Arguments {
      String left;
      String right;
      Arguments next;
      boolean empty;
      
      public Arguments() {
	empty = true;
	next = null;
      }
      
      public Arguments(String left, String right, Arguments next) {
	this.left = left;
	this.right = right;
	this.next = next;
	this.empty = false;
      }
      
      public void add(String left, String right) {
	if (!empty) this.next = new Arguments(this.left,this.right,this.next);
	this.left = left;
	this.right = right;
	this.empty = false;
      }
      
      public String lookup(String left) {
	if (empty) return null;
	if (left.equals(this.left)) return this.right;
	if (next==null) return null;
	return next.lookup(left);
      }
    }
  }
}
