/* Support for thread groups.
 * 
 * 29/9/99 JC
 *	- from thread.c
 * 23/10/03 JC
 * 	- threadgroup now has a kill flag as well
 */

/*

    This file is part of VIPS.
    
    VIPS is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */

/*

    These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk

 */

/* 
#define TIME_THREAD
#define DEBUG_IO
#define DEBUG_CREATE
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <vips/intl.h>

#include <stdio.h>
#include <stdlib.h>
#include <signal.h>

#include <vips/vips.h>
#include <vips/thread.h>

#ifdef WITH_DMALLOC
#include <dmalloc.h>
#endif /*WITH_DMALLOC*/

#ifdef TIME_THREAD
/* Size of time buffers.
 */
#define IM_TBUF_SIZE (20000)
#endif /*TIME_THREAD*/

/* Maximum number of concurrent threads we allow. No reason for the limit,
 * it's just there to stop mad values for IM_CONCURRENCY killing the system.
 */
#define IM_MAX_THREADS 32

/* Name of environment variable we get concurrency level from.
 */
#define IM_CONCURRENCY "IM_CONCURRENCY"

/* Tile geometry ... as ints rather than consts, so we can tune during
 * debugging.
 */
int im__tile_width = IM__TILE_WIDTH;
int im__tile_height = IM__TILE_HEIGHT;
int im__fatstrip_height = IM__FATSTRIP_HEIGHT;
int im__thinstrip_height = IM__THINSTRIP_HEIGHT;

#ifndef HAVE_THREADS
/* If we're building without gthread, we need stubs for the g_thread_*() and
 * g_mutex_*() functions. <vips/thread.h> has #defines which point the g_
 * names here.
 */

void im__g_thread_init( GThreadFunctions *vtable ) {}
gpointer im__g_thread_join( GThread *dummy ) { return( NULL ); }
GThread *im__g_thread_create_full( GThreadFunc d1, 
	gpointer d2, gulong d3, gboolean d4, gboolean d5, GThreadPriority d6,
	GError **d7 )
	{ return( NULL ); }

GMutex *im__g_mutex_new( void ) { return( NULL ); }
void im__g_mutex_free( GMutex *d ) {}
void im__g_mutex_lock( GMutex *d ) {}
void im__g_mutex_unlock( GMutex *d ) {}
#endif /*!HAVE_THREADS*/

/* Set (p)thr_concurrency() from IM_CONCURRENCY environment variable. Return 
 * the number of regions we should pass over the image.
 */
static int
get_concurrency( void )
{
	const char *str;
	int nthr;

	/* Tell the threads system how much concurrency we expect.
	 */
	if( (str = g_getenv( IM_CONCURRENCY )) ) {
		nthr = atoi( str );

		if( nthr < 1 || nthr > IM_MAX_THREADS ) {
			int n = IM_CLIP( 1, nthr, IM_MAX_THREADS );

			im_warning( "get_concurrency: $%s set "
				"to %d; should lie in [1,%d]; "
				"defaulting to %d", 
				IM_CONCURRENCY, nthr, IM_MAX_THREADS, n );
			nthr = n;
		}
	}
	else
		/* Stick to minimum.
		 */
		nthr = 1;

	/* 

		FIXME .. hmm

#ifdef SOLARIS_THREADS
	if( thr_setconcurrency( nthr + 1 ) ) {
		im_errormsg( "get_concurrency: unable to set "
			"concurrency level to %d", nthr + 1 );
		return( -1 );
	}
#ifdef DEBUG_IO
	printf( "im_generate: using thr_setconcurrency(%d)\n", nthr+1 );
#endif 
#endif

#ifdef HAVE_PTHREAD
#ifdef HAVE_PTHREAD_SETCONCURRENCY
	if( pthread_setconcurrency( nthr + 1 ) ) {
		im_errormsg( "get_concurrency: unable to set "
			"concurrency level to %d", nthr + 1 );
		return( -1 );
	}
#ifdef DEBUG_IO
	printf( "im_generate: using pthread_setconcurrency(%d)\n", nthr+1 );
#endif 
#endif 
#endif 
	 */

	return( nthr );
}

#ifdef TIME_THREAD
/* Save time buffers.
 */
static int
save_time_buffers( REGION *reg )
{
	int i;
	static int rn = 1;
	FILE *fp;
	char name[ 256 ];

	im_snprintf( name, 256, "time%d", rn++ );
	if( !(fp = fopen( name, "w" )) )
		error_exit( "unable to write to \"%s\"", name );
	for( i = 0; i < reg->tpos; i++ )
		fprintf( fp, "%lld\n%lld\n", reg->btime[i], reg->etime[i] );
	fclose( fp );

	return( 0 );
}
#endif /*TIME_THREAD*/

/* Junk a thread.
 */
static void
free_thread( im_thread_t *thr )
{
        /* Is there a thread running this region? Kill it!
         */
        if( thr->thread ) {
                thr->kill = -1;
		im_semaphore_up( &thr->go );
		(void) g_thread_join( thr->thread );
#ifdef DEBUG_CREATE
		printf( "free_thread: g_thread_join()\n" );
#endif /*DEBUG_CREATE*/
		thr->thread = NULL;
        }
	im_semaphore_destroy( &thr->go );

	if( thr->reg ) {
		im_region_free( thr->reg );
		thr->reg = NULL;
	}
	thr->oreg = NULL;
	thr->tg = NULL;

#ifdef TIME_THREAD
	if( thr->btime )
		(void) save_time_buffers( thr );
#endif /*TIME_THREAD*/
}

/* The work we do in one loop ... fill a region, and call a function. Either
 * called from the main thread (if no threading), or from worker.
 */
static void
work_fn( im_thread_t *thr )
{
	/* Prepare this area.
	 */
	if( thr->tg->inplace ) {
		if( im_prepare_to( thr->reg, thr->oreg, 
			&thr->pos, thr->x, thr->y ) )
			thr->error = -1;
	}
	else {
		if( im_prepare( thr->reg, &thr->reg->valid ) )
			thr->error = -1;
	}

	/* Call our work function.
	 */
	if( !thr->error && thr->tg->work && 
		thr->tg->work( thr->reg, thr->a, thr->b, thr->c ) )
		thr->error = -1;
}

#ifdef HAVE_THREADS
/* What runs as a thread ... loop, waiting to be told to fill our region.
 */
static void *
thread_main_loop( void *a )
{
        im_thread_t *thr = (im_thread_t *) a;
	im_threadgroup_t *tg = thr->tg;

	for(;;) {
		/* Signal the main thread that we are idle, and block.
		 */
		thr->idle = -1;
		im_semaphore_up( &tg->idle );
		im_semaphore_down( &thr->go );

		/* Asked to exit?
		 */
		if( thr->kill )
			break;

#ifdef TIME_THREAD
		/* Note start time.
		 */
		if( thr->btime )
			thr->btime[thr->tpos] = gethrtime();
#endif /*TIME_THREAD*/

		/* Loop once.
		 */
		work_fn( thr ); 

#ifdef TIME_THREAD
		/* Note stop time.
		 */
		if( thr->etime ) {
			thr->etime[thr->tpos] = gethrtime();
			thr->tpos++;
		}
#endif /*TIME_THREAD*/
	}

        return( NULL );
}
#endif /*HAVE_THREADS*/

/* Attach another thread to a threadgroup.
 */
static int
attach_thread( im_threadgroup_t *tg, int i )
{
	im_thread_t *thr;

	if( !(thr = IM_NEW( tg->im, im_thread_t )) )
		return( -1 );
	thr->reg = NULL;
	thr->thread = NULL;
	im_semaphore_init( &thr->go, 0, "go" );
	thr->idle = -1;
	thr->kill = 0;
	thr->error = 0;
	thr->oreg = NULL;
	thr->a = thr->b = thr->c = NULL;
#ifdef TIME_THREAD
	thr->btime = NULL;
	thr->etime = NULL;
	thr->tpos = 0;
#endif /*TIME_THREAD*/

	/* Link on.
	 */
	tg->thr[i] = thr;
	thr->tg = tg;

	/* Attach stuff. 
	 */
	if( !(thr->reg = im_region_create( tg->im )) )
		return( -1 );
#ifdef TIME_THREAD
	thr->btime = IM_ARRAY( tg->im, IM_TBUF_SIZE, hrtime_t );
	thr->etime = IM_ARRAY( tg->im, IM_TBUF_SIZE, hrtime_t );
	if( !thr->btime || !thr->etime )
		return( -1 );
#endif /*TIME_THREAD*/

#ifdef HAVE_THREADS
	/* Make a worker thread.
	 */
	if( !(thr->thread = 
		g_thread_create_full( thread_main_loop, thr, 
			IM__DEFAULT_STACK_SIZE, TRUE, FALSE, 
			G_THREAD_PRIORITY_NORMAL, NULL )) ) {
		im_error( "attach_thread", _( "unable to create thread" ) );
		return( -1 );
	}

#ifdef DEBUG_CREATE
	printf( "attach_thread: g_thread_create_full()\n" );
#endif /*DEBUG_CREATE*/
#endif /*HAVE_THREADS*/

	return( 0 );
}

/* Kill all threads in a threadgroup, if there are any.
 */
static void
kill_threads( im_threadgroup_t *tg )
{
	if( tg->thr ) {
		int i;

		for( i = 0; i < tg->nthr; i++ ) 
			free_thread( tg->thr[i] );
		tg->thr = NULL;

		/* Reset the idle semaphore.
		 */
		im_semaphore_destroy( &tg->idle );
		im_semaphore_init( &tg->idle, 0, "idle" );

#ifdef DEBUG_IO
		printf( "kill_threads: killed %d threads\n", tg->nthr );
#endif /*DEBUG_IO*/
	}
}

/* Free a threadgroup.
 */
int
im_threadgroup_free( im_threadgroup_t *tg )
{
#ifdef DEBUG_IO
	printf( "im_threadgroup_free: \"%s\" (0x%x)\n", 
		tg->im->filename,
		(unsigned int) tg );
#endif /*DEBUG_IO*/

	if( !tg || tg->zombie )
		return( 0 );

	kill_threads( tg );

	im_semaphore_destroy( &tg->idle );
	tg->zombie = -1;

	return( 0 );
}

/* Attach a threadgroup to an image.
 */
im_threadgroup_t *
im_threadgroup_create( IMAGE *im )
{
	im_threadgroup_t *tg;
	int i;

	/* Allocate and init new thread block.
	 */
	if( !(tg = IM_NEW( im, im_threadgroup_t )) )
		return( NULL );
	tg->zombie = 0;
	tg->im = im;
	tg->work = NULL;
	tg->inplace = 0;
	if( (tg->nthr = get_concurrency()) < 0 )
		return( NULL );
	tg->thr = NULL;
	tg->kill = 0;

	/* Pick a render geometry.
	 */
	switch( tg->im->dhint ) {
	case IM_SMALLTILE:
		tg->pw = im__tile_width;
		tg->ph = im__tile_height;
		tg->nlines = tg->ph;
		break;

	case IM_FATSTRIP:
		tg->pw = tg->im->Xsize;
		tg->ph = im__fatstrip_height;
		tg->nlines = tg->ph * tg->nthr * 2;
		break;

	case IM_ANY:
	case IM_THINSTRIP:
		tg->pw = tg->im->Xsize;
		tg->ph = im__thinstrip_height;
		tg->nlines = tg->ph * tg->nthr * 2;
		break;

	default:
		error_exit( "panic: internal error #98i245983425" );
	}

#ifdef DEBUG_IO
	printf( "im_threadgroup_create: %d by %d patches, "
		"groups of %d scanlines\n", tg->pw, tg->ph, tg->nlines );
#endif /*DEBUG_IO*/

	/* Attach tidy-up callback.
	 */
	if( im_add_close_callback( im, 
		(im_callback_fn) im_threadgroup_free, tg, NULL ) ) {
		(void) im_threadgroup_free( tg );
		return( NULL );
	}

	/* Init locks.
	 */
	im_semaphore_init( &tg->idle, 0, "idle" );

	/* Make thread array.
	 */
	if( !(tg->thr = IM_ARRAY( im, tg->nthr + 1, im_thread_t * )) )
		return( NULL );
	for( i = 0; i < tg->nthr + 1; i++ )
		tg->thr[i] = NULL;

	/* Attach threads.
	 */
	for( i = 0; i < tg->nthr; i++ )
		if( attach_thread( tg, i ) )
			return( NULL );

#ifdef DEBUG_IO
	printf( "im_threadgroup_create: \"%s\" (0x%x), with %d threads\n", 
		im->filename,
		(unsigned int) tg,
		tg->nthr );
#endif /*DEBUG_IO*/

	return( tg );
}

/* Wait for all threads to hit their 'go' semaphores.
 */
static void
threadgroup_wait( im_threadgroup_t *tg )
{
	/* Wait for all threads to signal idle.
	 */
	im_semaphore_downn( &tg->idle, tg->nthr );

	/* All threads are now blocked on go, and idle is zero. Up idle by the
	 * number of threads, ready for the next loop.
	 */
	im_semaphore_upn( &tg->idle, tg->nthr );
}

/* Find an idle thread.
 */
static int
find_idle( im_threadgroup_t *tg )
{
	int i;

	/* Wait for one of the threads to signal idle.
	 */
	im_semaphore_down( &tg->idle );

	/* Which one?
	 */
	for( i = 0; i < tg->nthr; i++ ) 
		if( tg->thr[i]->idle )
			return( i );

	im_errormsg( "find_idle: no idle threads" );
	return( -1 );
}

/* Trigger work. If not threading, just call fn directly.
 */
static void
trigger_work( im_threadgroup_t *tg, int i )
{
	/* Start worker going.
	 */
	tg->thr[i]->idle = 0;
	im_semaphore_up( &tg->thr[i]->go );

#ifndef HAVE_THREADS
	/* No threading ... just eval directly.
	 */
	work_fn( tg->thr[i] );
	tg->thr[i]->idle = -1;
#endif /*HAVE_THREADS*/
}

/* Test all threads for error.
 */
static int
any_errors( im_threadgroup_t *tg )
{
	int i;

	if( tg->kill )
		return( -1 );
	if( tg->im->kill )
		return( -1 );

	for( i = 0; i < tg->nthr; i++ ) 
		if( tg->thr[i]->error )
			return( -1 );

	return( 0 );
}

/* Loop over a big region, filling it in many small pieces with threads. 
 */
int
im__eval_to_region( im_threadgroup_t *tg, REGION *or )
{
	Rect *r = &or->valid;
	int x, y;
	int i;
	Rect pos;

	/* Note we'll be working to fill a contigious area.
	 */
	tg->inplace = 1;

	/* Loop over or, attaching to all sub-parts in turn.
	 */
	for( y = r->top; y < IM_RECT_BOTTOM( r ); y += tg->ph )
		for( x = r->left; x < IM_RECT_RIGHT( r ); x += tg->pw ) {
                        if( (i = find_idle( tg )) < 0 )
				return( -1 );

			/* Set the position we want to generate with this
			 * thread.
			 */
			pos.left = x;
			pos.top = y;
			pos.width = tg->pw;
			pos.height = tg->ph;

			/* Note params and start work.
			 */
			tg->thr[i]->oreg = or; 
			tg->thr[i]->pos = pos; 
			tg->thr[i]->x = pos.left;
			tg->thr[i]->y = pos.top;
			trigger_work( tg, i );

			/* Trigger any eval callbacks on our source image.
			 */
			im__handle_eval( tg->im, tg->pw, tg->ph );

			/* Check for errors.
			 */
			if( any_errors( tg ) ) {
				/* Don't kill threads yet ... we may want to
				 * get some error stuff out of them.
				 */
				threadgroup_wait( tg );
				return( -1 );
			}
		}

	/* Wait for all threads to hit 'go' again.
	 */
	threadgroup_wait( tg );

	if( any_errors( tg ) )
		return( -1 );

	return( 0 );
}

/* Loop over an image, preparing in parts with threads ... used by iterate.
 */
int
im__eval_to_image( im_threadgroup_t *tg, IMAGE *im )
{
	int x, y;
	int i;
	Rect pos;

	/* Set up.
	 */
	tg->inplace = 0;

	/* Loop over or, attaching to all sub-parts in turn.
	 */
	for( y = 0; y < im->Ysize; y += tg->ph )
		for( x = 0; x < im->Xsize; x += tg->pw ) {
                        /* Find an idle thread.
                         */
                        if( (i = find_idle( tg )) < 0 )
                                return( -1 );

			/* Set the position we want to generate with this
			 * thread.
			 */
			pos.left = x;
			pos.top = y;
			pos.width = tg->pw;
			pos.height = tg->ph;

			/* Attach to this position.
			 */
			if( im_region_local( tg->thr[i]->reg, &pos ) )
				return( -1 );

			/* Start worker going.
			 */
			trigger_work( tg, i );

			/* Trigger any eval callbacks on our source image.
			 */
			im__handle_eval( im, tg->pw, tg->ph );

			/* Check for errors.
			 */
			if( any_errors( tg ) ) {
				/* Don't kill threads yet ... we may want to
				 * get some error stuff out of them.
				 */
				threadgroup_wait( tg );
				return( -1 );
			}
		}

	/* Wait for all threads to hit 'go' again.
	 */
	threadgroup_wait( tg );

	/* Test for any errors.
	 */
	if( any_errors( tg ) )
		return( -1 );

	return( 0 );
}
