#version 430 compatibility

#extension GL_ARB_compute_shader : enable
#extension GL_ARB_shader_storage_buffer_object : enable

/*-------------------- LBM model data -------------------------------------------------------------------------*/
#define width 640
#define height 320
#define NUM_VECTORS 9
#define ex0 0
#define ex1 1
#define ex2 0
#define ex3 -1
#define ex4 0
#define ex5 1
#define ex6 -1
#define ex7 -1
#define ex8 1
#define ey0 0
#define ey1 0
#define ey2 1
#define ey3 0
#define ey4 -1
#define ey5 1
#define ey6 1
#define ey7 -1
#define ey8 -1
#define inv0 0
#define inv1 3
#define inv2 4
#define inv3 1
#define inv4 2
#define inv5 7
#define inv6 8
#define inv7 5
#define inv8 6
#define C_FLD 1
#define C_BND 0

layout( binding=0 ) buffer dcF { int F[  ]; };
layout( binding=1 ) buffer dcU { float U[  ]; };
layout( binding=2 ) buffer dcV { float V[  ]; };
layout( binding=3 ) buffer dcR { float R[  ]; };
layout( binding=4 ) buffer df0 { float f0[  ]; };
layout( binding=5 ) buffer df1 { float f1[  ]; };

layout( local_size_x = 32, local_size_y = 32, local_size_z = 1 ) in;

int per(int x, int nx)
{
/*	if(x<0) x=nx+x+1; 
		else if(x>nx) x = x-nx-1;  */
		if(x<0) 
			x = nx;
		
		if(x>nx)
			x = 0;
		
	return x;
}

void main()
{
	int i = int(gl_GlobalInvocationID.x);
	int j = int(gl_GlobalInvocationID.y);
		
	int idx = i+j*width;	
	int ip,jp;

	if( F[ idx ] == C_FLD )
	{
		//0
		ip = i+ex0;	jp = j+ey0;	
		ip=per(ip,width-1); jp=per(jp,height-1);
		//c1f0[ NUM_VECTORS*(jp * width + ip) ] = f0[ idx*NUM_VECTORS ];
		f1[ NUM_VECTORS*(jp * width + ip) ] = f0[ idx*NUM_VECTORS ];
		//1	
		ip = i+ex1;	jp = j+ey1;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 1+ NUM_VECTORS*(jp * width + ip) ] = f0[ 1+idx*NUM_VECTORS ];
		else
			f1[ 3+NUM_VECTORS*(jp * width + ip) ] = f0[ 1+idx*NUM_VECTORS ];
		//2
		ip = i+ex2;	jp = j+ey2;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 2+NUM_VECTORS*(jp * width + ip) ] = f0[ 2+idx*NUM_VECTORS ];
		else
			f1[ 2+NUM_VECTORS*(jp * width + ip) ] = f0[ 4+idx*NUM_VECTORS ];
		//3	
		ip = i+ex3;	jp = j+ey3;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 3+NUM_VECTORS*(jp * width + ip) ] = f0[ 3+idx*NUM_VECTORS ];
		else
			f1[ 3+NUM_VECTORS*(jp * width + ip) ] = f0[ 1+idx*NUM_VECTORS ];
		//4
		ip = i+ex4;	jp = j+ey4;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 4+NUM_VECTORS*(jp * width + ip) ] = f0[ 4+idx*NUM_VECTORS ];
		else
			f1[ 4+NUM_VECTORS*(jp * width + ip) ] = f0[ 2+idx*NUM_VECTORS ];
		//5
		ip = i+ex5;	jp = j+ey5;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 5+NUM_VECTORS*(jp * width + ip) ] = f0[ 5+idx*NUM_VECTORS ];
		else
			f1[ 5+NUM_VECTORS*(jp * width + ip) ] = f0[ 7+idx*NUM_VECTORS ];
		//6
		ip = i+ex6;	jp = j+ey6;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 6+NUM_VECTORS*(jp * width + ip) ] = f0[ 6+idx*NUM_VECTORS ];
		else
			f1[ 6+NUM_VECTORS*(jp * width + ip) ] = f0[ 8+idx*NUM_VECTORS ];
		//7
		ip = i+ex7;	jp = j+ey7;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 7+NUM_VECTORS*(jp * width + ip) ] = f0[ 7+idx*NUM_VECTORS ];
		else
			f1[ 7+NUM_VECTORS*(jp * width + ip) ] = f0[ 5+idx*NUM_VECTORS ];
		//8
		ip = i+ex8;	jp = j+ey8;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 8+NUM_VECTORS*(jp * width + ip) ] = f0[ 8+idx*NUM_VECTORS ];
		else
			f1[ 8+NUM_VECTORS*(jp * width + ip) ] = f0[ 6+idx*NUM_VECTORS ];

	} else
	{
		//0
		ip = i+ex0;	jp = j+ey0;	ip=per(ip,width-1); jp=per(jp,height-1);
		f1[ NUM_VECTORS*(jp * width + ip) ] = f0[ idx*NUM_VECTORS ];
		//1	
		ip = i+ex1;	jp = j+ey1;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 1+NUM_VECTORS*(jp * width + ip) ] = f0[ 1+idx*NUM_VECTORS ];
		//2
		ip = i+ex2;	jp = j+ey2;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 2+NUM_VECTORS*(jp * width + ip) ] = f0[ 2+idx*NUM_VECTORS ];
		//3	
		ip = i+ex3;	jp = j+ey3;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 3+NUM_VECTORS*(jp * width + ip) ] = f0[ 3+idx*NUM_VECTORS ];
		//4
		ip = i+ex4;	jp = j+ey4;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 4+NUM_VECTORS*(jp * width + ip) ] = f0[ 4+idx*NUM_VECTORS ];
		//5
		ip = i+ex5;	jp = j+ey5;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 5+NUM_VECTORS*(jp * width + ip) ] = f0[ 5+idx*NUM_VECTORS ];
		//6
		ip = i+ex6;	jp = j+ey6;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 6+NUM_VECTORS*(jp * width + ip) ] = f0[ 6+idx*NUM_VECTORS ];
		//7
		ip = i+ex7;	jp = j+ey7;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 7+NUM_VECTORS*(jp * width + ip) ] = f0[ 7+idx*NUM_VECTORS ];
		//8
		ip = i+ex8;	jp = j+ey8;	ip=per(ip,width-1); jp=per(jp,height-1);
		if( F[(jp * width + ip) ] == C_FLD )
			f1[ 8+NUM_VECTORS*(jp * width + ip) ] = f0[ 8+idx*NUM_VECTORS ];
	}	
}
