// This file is part of the pdr/pdx project.
// Copyright (C) 2010 Torsten Mueller, Bern, Switzerland
//
// This program is free software: you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation, either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include "../libpdrx/common.h"

using namespace std;
using namespace boost;
using namespace boost::posix_time;
using namespace boost::gregorian;
using namespace boost::program_options;

#include "../libpdrx/xception.h"
#include "../libpdrx/conversions.h"
#include "../libpdrx/config.h"
#include "db.h"
#include "in_impl.h"
#include "http.h"

#include <Poco/SAX/SAXParser.h>
#include <Poco/SAX/SAXException.h>
#include <Poco/SAX/ErrorHandler.h>
#include <Poco/SAX/ContentHandler.h>
#include <Poco/SAX/Attributes.h>
#include <Poco/SAX/InputSource.h>

using namespace Poco;
using namespace Poco::XML;

// note: the two consumer strings are constants defined by Twitter during
// the registration of pdr there as a 3rd party application, all pdr users
// use these two common keys but will need two additional, personalized keys
static const string consumer_key("rJkqCExigmwSC1aEIRSbDA");
static const string consumer_secret("Ifxi8iR08gwLuOkj9otyw8gCZxlwLThXBXZEipI0zs");

//=== TwitterRssClient =====================================================
TwitterRssClient::TwitterRssClient (const string& option_key)
	: InputImpl(option_key)
{
}

namespace TwitterRssLocal {

	typedef map<ptime, string> Expressions;

	class error_handler: public ErrorHandler
	{
		public:

		error_handler ();

		virtual void fatalError (const SAXException& exc);
		virtual void error (const SAXException& exc);
		virtual void warning (const SAXException& exc);
	};

	error_handler::error_handler ()
	{
	}

		void handle (const string& s, const SAXException& exc)
		{
			string msg(exc.displayText());
			string::size_type pos = msg.find("Exception: ");
			if (pos != string::npos)
				msg.erase(0, pos + 11);
			THROW(s + ": " + msg);
		}

	void error_handler::fatalError (const SAXException& exc)
	{
		handle("RSS fatal", exc);
	}

	void error_handler::error (const SAXException& exc)
	{
		handle("RSS error", exc);
	}

	void error_handler::warning (const SAXException& exc)
	{
		handle("RSS warning", exc);
	}

	class content_handler: public ContentHandler
	{
		friend void item_channel (content_handler& );

		enum State {none, rss, channel, item, description, pubDate};
		typedef stack<State> StateStack;

		SAXParser& m_parser;
		StateStack m_state;

		Expressions& m_expressions;
		ptime m_timestamp;
		string m_description;
		string m_pubDate;

		public:

		class Stopped {};

		content_handler (SAXParser& parser, Expressions& expressions, const ptime& timestamp);

		virtual void characters (const XMLChar ch[], int start, int length);
		virtual void startDocument () {}
		virtual void endDocument () {}
		virtual void startElement (const XMLString& uri, const XMLString& localName, const XMLString& qname, const Attributes& attrList);
		virtual void endElement (const XMLString& uri, const XMLString& localName, const XMLString& qname);
		virtual void startPrefixMapping (const XMLString& prefix, const XMLString& uri) {}
		virtual void endPrefixMapping (const XMLString& prefix) {}
		virtual void ignorableWhitespace (const XMLChar ch[], int start, int length) {}
		virtual void processingInstruction (const XMLString& target, const XMLString& data) {}
		virtual void setDocumentLocator (const Locator* loc) {}
		virtual void skippedEntity (const XMLString& name) {}
	};

	content_handler::content_handler (SAXParser& parser, Expressions& expressions, const ptime& timestamp)
		: m_parser(parser)
		, m_state()
		, m_expressions(expressions)
		, m_timestamp(timestamp)
		, m_description()
		, m_pubDate()
	{
		m_state.push(none);
	}

	void content_handler::characters (const XMLChar ch[], int start, int length)
	{
		// we must accumulate the stuff we get here, it really comes
		// in several pieces
		switch (m_state.top())
		{
			case description:	m_description += string(ch + start, length); break;
			case pubDate:		m_pubDate += string(ch + start, length); break;
			default:		break; // ignore everything else
		}
	}

	void content_handler::startElement (const XMLString& uri, const XMLString& localName, const XMLString& qname, const Attributes& attrList)
	{
		typedef void (*CallbackProc) (content_handler& , const XMLString& , const Attributes& );

		struct Data {
			State state;
			const char* keyword;
			State next;
		};

		static const Data data[] = {
			{none,		"rss",		rss},
			{rss,		"channel",	channel},
			{channel,	"item",		item},
			{item,		"description",	description},
			{item,		"pubDate",	pubDate}
		};

		for (size_t i = 0; i < sizeof(data) / sizeof(Data); i++)
		{
			if (m_state.top() == data[i].state && localName == data[i].keyword)
			{
				m_state.push(data[i].next);
				break;
			}
		}
	}

		void item_channel (content_handler& ch)
		{
			const ptime& pubDate = lexical_cast_mime_date(ch.m_pubDate);
			if (ch.m_timestamp != not_a_date_time && pubDate <= ch.m_timestamp)
				throw content_handler::Stopped();

			string text;
			{
				// regardless of our native encoding and
				// the encoding in the feed the encoding
				// here is strict UTF-8, so we don't decode
				// here anything

				// strip a possible publisher here
				static const regex rx("[^:]*: (.*)");
				smatch mr;
				if (regex_match(ch.m_description, mr, rx))
					text = mr[1];
				else
					text = ch.m_description;
			}

			ch.m_expressions.insert(Expressions::value_type(pubDate, text));
			ch.m_description.clear();
			ch.m_pubDate.clear();
		}

	void content_handler::endElement (const XMLString& uri, const XMLString& localName, const XMLString& qname)
	{
		typedef void (*CallbackProc) (content_handler& );

		struct Data {
			State state;
			const char* keyword;
			CallbackProc proc;
		};

		static const Data data[] = {
			{rss,		"rss",		NULL},
			{channel,	"channel",	NULL},
			{item,		"item",		item_channel},
			{description,	"description",	NULL},
			{pubDate,	"pubDate",	NULL}
		};

		for (size_t i = 0; i < sizeof(data) / sizeof(Data); i++)
		{
			if (m_state.top() == data[i].state && localName == data[i].keyword)
			{
				m_state.pop();
				if (data[i].proc)
					data[i].proc(*this);
				break;
			}
		}
	}

} // namespace TwitterRssLocal

void TwitterRssClient::Do (const Config& config, Database& database) const throw (Xception)
{
	bool verbose = config.GetBoolOption("verbose");
	if (verbose)
		encoded::cout << "looking for Twitter RSS feed" << endl;

	// get configuration data
	const string& proxy = config.GetStringOption(m_option_key + ".proxy");

	// open a HTTP connection
	OAuthHttpClient http(consumer_key, consumer_secret, config, m_option_key);
	http.Open("api.twitter.com", proxy);

	// retrieve the last point in history we did request this feed,
	// this should then be saved in the database, if we don't have
	// such a timestamp we let the database find the youngest
	// collection item we have at all, we stop reading data from
	// the feed at this point
	ptime timestamp = database.GetLastRssUpdate(http.GetUniqueFeedIdentifier());
	if (timestamp == not_a_date_time)
		timestamp = database.GetYoungestCollectionItemAtAll();

	// now request the feed and parse the items
	//
	// we do this in a loop because the feed gives us only 20 items
	// at once, so we must add a page parameter to the request to
	// get the rest
	TwitterRssLocal::Expressions expressions;
	{
		SAXParser parser;

		TwitterRssLocal::error_handler eh;
		parser.setErrorHandler(&eh);

		TwitterRssLocal::content_handler ch(parser, expressions, timestamp);
		parser.setContentHandler(&ch);

		EncodingNames names;
		GetEncodingNames(names);
		foreach (const string& name, names)
		{
			parser.addEncoding(name, &SpecificEncoding(name));
		}

		const string filename("http://api.twitter.com/1/statuses/user_timeline.rss");
		size_t page = 1;
		do
		{
			// do the HTTP request
			http.Request("GET", filename + "?page=" + lexical_cast<string>(page++));
			const string& response = http.Response();

			// parse the output
			try
			{
				size_t n = expressions.size();
				stringstream ss(response);
				InputSource isrc(ss);
				parser.parse(&isrc);
				if (expressions.size() == n)
					break;
			}
			catch (TwitterRssLocal::content_handler::Stopped)
			{
				// ok, no more expressions from the feed, stop here
				break;
			}
			CATCH_RETHROW("")
			catch (...)
			{
				THROW("xml parse error");
			}
		} while (true);
	}

	// now we can truely close the HTTP connection
	http.Close();

	// put all the received stuff into the database
	if (expressions.empty())
	{
		if (verbose)
			encoded::cout << "    no data in feed" << endl;
	}
	else
	{
		Database::Collections collections;
		database.GetCollections(collections);

		Database::CollectionsItems items;
		bool rejects = false;
		foreach(const TwitterRssLocal::Expressions::value_type& vt, expressions)
		{
			try
			{
				InputImpl::Parse(vt.second, vt.first, verbose, collections, items);
			}
			catch (const Xception& x)
			{
				encoded::cerr << x.Message(Xception::brief) << endl;
				database.AddRejected(vt.first, vt.second);
				rejects = true;
			}
		}
		database.AddCollectionsItems(items);

		// save the timestamp of the youngest RSS item for later use
		TwitterRssLocal::Expressions::const_iterator I = expressions.end();
		const TwitterRssLocal::Expressions::value_type& vt = *(--I);
		database.SetLastRssUpdate(http.GetUniqueFeedIdentifier(), vt.first);

		if (rejects)
			encoded::cerr << "!!! at least one expression has been rejected, try -r to list rejections !!!" << endl;
	}
}
