// ==UserScript==
// @name           SITEINFO speculator for AutoPagerize
// @namespace      http://ido.nu/kuma/
// @include        *
// ==/UserScript==
//
var  VERSION = '0.0.2';

var log = console.log;

function getAPRize(name) {
	return eval(name, window.AutoPagerize.addDocumentFilter);
}
function callAPRize(name, args) {
	var f = getAPRize(name);
	return f.apply(f, args);
}


var $X = window.Minibuffer.$X;

var nextLinkTexts = [
	"\u00BB", "次", "Older", "older", "Next", "next", "→", "←", ">", "<",
	"\u00AB", "前", "Prev", "prev", 
];

var nextLink;
var siteinfo;

var message = [];

var isLoading = true;

run();


function findNextLink() {
	//%AB laquo;
	//%BB raquo;
	var candidates = [];

	for ( var i = 0; i < nextLinkTexts.length; i++ ) {
		var keyword = nextLinkTexts[i];
		var predicates = 'contains(text(),"' + keyword + '")';
		var xpath = '//a[' + predicates + ']/@href';
		var nextLink = $X(xpath, window.document);
		if ( nextLink && nextLink.length > 0 ) {
			var node = nextLink.pop();
			var u;
			var nextPageUrl = node.nodeValue.toString();
			if ( nextPageUrl.match( /^https?:\/\// ) ) {
				u = nextPageUrl;
			} else {
				u = callAPRize('pathToURL', [document.location.href, nextPageUrl]);
			}

			if ( keyword != escape(keyword) )
				keyword = '';

			candidates.push( {
				node: node.ownerElement,
				url:  u,
				predicates: (keyword == escape(keyword) ? predicates : '') ,
				score: 1 / node.ownerElement.textContent.length,
			} );
		}
	}
	return candidates.sort( function (a, b) { return a.score < b.score } ).shift();
}

function findImageNextLinks() {
	var candidates = [];

	for ( var i = 0; i < nextLinkTexts.length; i++ ) {
		var keyword = nextLinkTexts[i];

		[
			"alt",
			"title",
			"src",
		].forEach( function (attr) {
			var predicates = './img[contains(@' + attr + ',"' + keyword + '")]';
			var xpath = '//a[ ' + predicates + ']/@href';
			var nextLink = $X(xpath, window.document);
			if ( nextLink && nextLink.length > 0 ) {
				var node = nextLink.pop();
				var u;
				var nextPageUrl = node.nodeValue.toString();
				if ( nextPageUrl.match( /^https?:\/\// ) ) {
					u = nextPageUrl;
				} else {
					u = callAPRize('pathToURL', [document.location.href, nextPageUrl]);
				}

				if ( keyword != escape(keyword) )
					keyword = '';

				candidates.push( {
					node: node.ownerElement,
					url:  u,
					predicates: (keyword == escape(keyword) ? predicates : '') ,
					score: 1 / node.ownerElement.textContent.length,
				} );
			}
		} );
	}
	return candidates.sort( function (a, b) { return a.score < b.score } ).shift();
}

function run () {
	if ( window != window.top )
		return;

	if ( getAPRize("ap") == null ) {
		nextLink = findNextLink();

		if ( !nextLink || nextLink.length == 0 ) {
			nextLink = findImageNextLinks();
		}

		getAPRize("COLOR.on = '#080'");

		if ( nextLink ) {
			var opt = {
				method: 'get',
				url: nextLink.url,
				onload: analyzeNextpage,
				overrideMimeType: 'text/html; charset=' + document.characterSet
			}
			GM_xmlhttpRequest(opt)
		}

		var a = getAPRize("AutoPager");
		var COLOR = getAPRize("COLOR");
		a.prototype.terminate = function () {

			if ( isLoading ) {
				message.push(  "pageElement empty / already loaded." );
			} else {
				message.push(  "nextLink url empty." );
			}


			this.icon.style.background = COLOR['terminated']
		} ;
		a.prototype.addPage = function (htmlDoc, page) {
			var e = document.getElementById("autopagerize_icon");
			e.style.color = "black;";
			e.innerHTML = Number(++this.pageNum);
			var self = this
			return page.map(function(i) {
				var pe = document.importNode(i, true)
				self.insertPoint.parentNode.insertBefore(pe, self.insertPoint)
				return pe
			})
		}
		a.prototype.showLoading = function(sw) {
			isLoading = sw;

			if (sw) {
				this.icon.style.background = COLOR['loading']
			}
			else {
				this.icon.style.background = COLOR['on']
			}
		}

	} else {
		console.log( "ap exists.",  getAPRize("ap"));
	}

}

function analyzeNextpage(res) {
	var doc2 = callAPRize('createHTMLDocumentByString', [res.responseText]);

try {

	var doc1 = unsafeWindow.document;
	//var doc1 = window.document;

	markDiffAmounts(doc1.documentElement, doc2.documentElement);

	var pageElement = findPageElement( doc1.documentElement);

	var pageElementChildren = $X('./*[number(@__diff) > 0]', pageElement);
	var stat = {};
	pageElementChildren.forEach ( function (e) {
		var tag = e.nodeName.toLowerCase();
		var id  = e.getAttribute('id') || '';
		if ( id ) {
			stat[tag + "#" + id] = (stat[tag + "#" + id] || 0) + 1;
		}
		
		var classes = e.getAttribute('class') || '';
		if ( classes ) {
			classes.split(/\s+/).forEach( function (c) {
				stat[tag + "." + c] = (stat[tag + "." + c] || 0) + 1;
			} );
		}
	} );

	var childrenExpression = null;
	for ( var i in stat ) {
		if ( stat[i] == pageElementChildren.length ) {
			childrenExpression = i;
			break;
		}
	}
	if ( !childrenExpression ) {
		while (pageElement.nodeName.match(/^(tbody|li|tr|td|dt|dd)$/i) ) {
			pageElement = pageElement.parentNode;
		}
	}

	var nextLinkXPath = DOMDiff.get_neat_nodePath(nextLink.node, nextLink.predicates);

	if ( nextLinkXPath == '/' ) {
		message.push('get_neat_nodePath(nextLinkXPath) returned "/".');
		nextLinkXPath = '//*';
	}


	var xpath = DOMDiff.get_neat_nodePath(pageElement);
	if ( childrenExpression ) {
		if ( childrenExpression.match( /^\w+\#/ ) ) {
			var pair = childrenExpression.split(/\#/);
			xpath += "/" + pair[0] + '[@id="' + pair[1] + '"]';
		} else {
			var pair = childrenExpression.split(/\./);
			xpath += "/" + pair[0] + '[contains(@class,"' + pair[1] + '")]';
		}
	}
	if ( xpath == '/' ) {
		message.push('get_neat_nodePath(nextLinkXPath) returned "/".');
		xpath = '//*';
	}

	var siteinfo = [
		{
			url:	document.location.href.replace(/\?.*/, ''),
			nextLink:	nextLinkXPath,
			pageElement:	xpath
		}
	];
	console.log("pageElement", pageElement , xpath) ;
	console.log("siteinfo", siteinfo);
	callAPRize('launchAutoPager', [siteinfo]);
	console.log("launchAutoPager.");

	
	// add speculation rating UI.

	var e = document.getElementById("autopagerize_help");
	var d = document.createElement('div');
	var xml = <div>
	<div>Rate this speculation</div>
		<ul id="__siteinfo_speculator" style="font-size: 14px; line-height: 150%; list-style-type: none; font-weight: bold">
			<li><a href="javascript:void(1)" id="__siteinfo_speculator_p2" >perfect</a></li>
			<li><a href="javascript:void(2)" id="__siteinfo_speculator_p1"  >ok</a></li>
			<li><a href="javascript:void(3)" id="__siteinfo_speculator_p0"  >messy</a></li>
		</ul>
	</div>;
	d.innerHTML = xml.toXMLString();
	e.appendChild(d);


	[2, 1, 0].map ( function (n) {
			var id = "__siteinfo_speculator_p" + n;
			var e = document.getElementById(id);

			e.addEventListener( 'click', function (ev) {
				window.setTimeout(function() {
					var w = eval("window");
					var opts = {
						url: "http://ido.nu/kuma/speculator/ratings/add",
						method: "POST",
						headers:{"Content-Type":"application/x-www-form-urlencoded"},
						data: [
							"data[Rating][url]=" + encodeURIComponent(document.location.href),
							"data[Rating][rate]=" + (n * 5),
							"data[Rating][ua]=" + encodeURIComponent(window.navigator.userAgent),
							"data[Rating][nextLink]=" + encodeURIComponent(siteinfo[0].nextLink),
							"data[Rating][pageElement]=" + encodeURIComponent(siteinfo[0].pageElement),
							"data[Rating][message]=" + encodeURIComponent(message),
						].join("&"),
						onload: function (res) {
							var spec = document.getElementById('__siteinfo_speculator');
							spec.parentNode.innerHTML = '<div style="font-size: 150%;">thx.</div>';
						}
					};
					GM_xmlhttpRequest( opts );
				}, 0);

			}, false);
	} );

} catch(e) { console.log(e); }

}

function findPageElement( root ) {
	var node = root;

	var weightParam = 0.5;

	// to prevent infinite loop.
	var i = 200;
	while ( i-- ) {
		var subtotalDifferences = node.getAttribute("__diff");

		var children = $X("./child::*/@__diff", node);
		if ( children.length == 1 ) {
			node = children[0].ownerElement;
			continue;
		}
		var differences = children.filter( function (n) {
				return ( ( n.nodeValue / subtotalDifferences ) > weightParam ) 
				} );
		if ( differences.length ) {
			node = differences[0].ownerElement;
			continue;
		} else {
			break;
		}
	}
	return node;
}



function markDiffAmounts(doc1, doc2) {
	// get tails.
	var predicates = ['IMG', 'PARAM', 'NOEMBED', 'NOSCRIPT', 'EMBED', 'BR', 'SCRIPT', 'STYLE'].map( function (n) {
		return "not(local-name()=" + n.quote() + ")";
	} ).join(" and ");
	var xpath = '//text()[not(normalize-space(.)="") and parent::*[ ' + predicates + " ]]"
	var doc1tails = $X(xpath, doc1);
	var doc2tails = $X(xpath, doc2);

	var hash = {};

	for ( var i = 0; i < doc2tails.length; i++ ) {
		var node = doc2tails[i];
		var h = nodeHash(node);
		hash[ h ] = 1;
	}

	var added = doc1tails.filter( function (node) {
		var h = nodeHash(node);
		return( !hash[h] );
	} );

	added.map( function (node) {
		var parent = node.parentNode;
		var score = node.textContent.length + 1;

		if ( node.nodeType == node.ELEMENT_NODE )
			node.setAttribute('__diff', score);

		while ( node.parentNode.nodeType != node.DOCUMENT_NODE ) {
			var n = node.parentNode.getAttribute('__diff') || 0;
			node.parentNode.setAttribute('__diff', Number(n) + score);

			node = node.parentNode;
		}
	} );
}


function nodeHash( node ) {
	return String(node.textContent);
}

function xN (xpath, context) {
	var xpath = 'count(' + xpath + ')';
	return $X( xpath, context, Number);
}


/*
Copyright (c) 2007 KUMAGAI Kentaro, GMO Internet lab.
http://labs.gmo.jp/blog/ku/

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
*/
// messy and fool.
	DOMDiff = function () {}

	function getFollwingSiblingCount(node, predicates) {
		var xpath = './following-sibling::' + node.nodeName.toLowerCase();
		if ( predicates ) {
			xpath += predicates;
		}
		var n = xN(xpath , node)
			return n;
	}

	function get_position(node, predicates) {
		var xpath = './preceding-sibling::' + node.nodeName.toLowerCase();
		if ( predicates ) {
			xpath += predicates;
		}
		var n = xN(xpath , node) + 1
			return n;
	}

	DOMDiff.get_neat_nodePath = function (node, predicates) {
		if ( node.parentNode && (
			node.parentNode.nodeType == node.DOCUMENT_NODE ||
			node.nodeName.toLowerCase() == 'body'
		) ) {
			//log("parent is void or html", node, node.parentNode );
			return "/";
		}  else {
			var id = node.getAttribute("id");
			if ( id ) {
				var cond = ( '[@id="'+ id + '"]' );
				var exp = '//' + node.nodeName.toLowerCase() +  cond ;
				var n = xN(exp, node.ownerDocument );
				if ( n == 1 ) {
					return exp;
				}
			}

			var condition = null;
				
			var class_attribute = node.getAttribute("class");
			if ( class_attribute ) {
				// FIXME:
				// this code could detect false positive classname.
				// because no easy way to avoid partial match with XPath.
				var classes = class_attribute.split(/\s+/);
				for ( var i = 0; i < classes.length ; i++ ) {
					var classname  = classes[i];
					var cond = ( classes.length == 1 ) ?
						( '@class="'+ classname + '"' ) :
						( "contains(@class, '"+ classname  +"')" );
					var exp = '//' + node.nodeName.toLowerCase() + "[" + cond + "]";

					var n = xN(exp, node.ownerDocument );
					if ( n == 1 ) {
						return exp;
					}
				}
			}

			if ( condition == null ) {
				condition = get_position(node);
				var followings = getFollwingSiblingCount(node);
				if ( followings == 0 ) {
					condition = 'last()';
				} else if (condition != 1) {
				}
				if ( predicates )  {
					condition += ' and ' + predicates;
				}
			}

			return DOMDiff.get_neat_nodePath(node.parentNode) + "/" + node.nodeName.toLowerCase() +
				"[" + condition + "]" ;
		}
	}


