/*
 * Copyright (C) 2010 Olivier PARISOT <parisot_olivier@yahoo.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */package org.doopyon.ravanelab.util;

import java.io.*;
import java.util.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.HTML.*;
import au.com.bytecode.opencsv.*;


/**
 * Callback used to parse an html page: extract HTML tables content and put in a CSV format.
 * 
 * @author Olivier PARISOT
 */
public final class HTMLTableToCSVCallback extends HTMLEditorKit.ParserCallback
{
	//
	// Instance fields
	//
	
	/** */
	private final StringBuilder sb;
	/** */
	private boolean inTable;
	
	
	//
	// Constructor
	//
	
	/**
	 * Constructor.
	 */
	public HTMLTableToCSVCallback()
	{
		this.sb=new StringBuilder();
		this.inTable=false;
	}
	
	
	//
	// Instance methods
	//
	
	/** 
	 * {@inheritDoc}
	 */
	@Override
	public void handleEndTag(Tag t, int pos) 
	{
		if (t.equals(Tag.TD))
		{
			sb.append(CSVReader.DEFAULT_SEPARATOR);
		}
		else if (t.equals(Tag.TR))
		{
			sb.setLength(sb.length()-1);
			sb.append('\n');
		}
		else if (t.equals(Tag.TABLE))
		{
			inTable=false;
		}
	}

	/** 
	 * {@inheritDoc}
	 */
	@Override
	public void handleStartTag(Tag t, MutableAttributeSet a, int pos) 
	{
		if (t.equals(Tag.TABLE)||t.equals(Tag.TR)||t.equals(Tag.TD)) inTable=true;
	}

	/** 
	 * {@inheritDoc}
	 */
	@Override
	public void handleText(char[] data, int pos) 
	{
		if (inTable) sb.append(data);
	}
	
	/**
	 * 
	 * @return
	 */
	public List<String[]> getTable()
	{
		List<String[]> r=null;
	    try 
	    {
	    	final CSVReader csvreader=new CSVReader(new StringReader(sb.toString()));
			r=csvreader.readAll();			
		} 
	    catch (IOException e) 
	    {
			e.printStackTrace();
		}	
	    return r;
	}
}
