package net.sourceforge.livepinger.extracter;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import net.sourceforge.livepinger.dto.Live;
import net.sourceforge.livepinger.dto.LivetubeLive;

public class LivetubeExtracter extends ExtracterBase {
	private static Log log = LogFactory.getLog(LivetubeExtracter.class);
	
	/** N[URL */
	static String CRAWL_URL = "http://h.livetube.cc/";
	
	/** zMҒop^[ **/
	Pattern anchorPattern = Pattern.compile("<a href=\"/?([^\"]+)/([^\"]+)\">.*?</a>"); //$NON-NLS-1$
	Pattern livePattern = Pattern.compile("<b>.*?</b>.*?\\(([0-9]+):([0-9]+),([0-9]+)R\\)"); //$NON-NLS-1$

	@Override
	public List<Live> extract(String data) {
		List<Live> lives = new ArrayList<Live>();
		Matcher anchorMatcher = anchorPattern.matcher(data);

		// ׂẴAJ[^O𒊏o
		while (anchorMatcher.find()) {
			
			// zM҈ȊÕ^OO
			Matcher distMatcher = livePattern.matcher(anchorMatcher.group(0));
			if (!distMatcher.find()) continue;

			// DBɊ܂܂ȂzMۑ
			try {
				String distName = URLDecoder.decode(anchorMatcher.group(1), "UTF-8");
				String liveName = URLDecoder.decode(anchorMatcher.group(2), "UTF-8");
				
				log.info("[extract] distributor: " + distName);
				log.info("[extract] live: " + liveName);
				
				Live live = new LivetubeLive(LivetubeLive.BASE_URL + anchorMatcher.group(1) + "/" + anchorMatcher.group(2), 
						distName, liveName);
				live.setOnair(true);
				lives.add(live);
				
				
			} catch (UnsupportedEncodingException e) {
				e.printStackTrace();
			}
		}
		
		return lives;
	}

	@Override
	public String getCrawlUrl() {
		return CRAWL_URL;
	}

}
