Chapter 3. GRU Software Functions

This chapter describes software functions that can be used on the global reference unit (GRU). For a description of the GRU, see Chapter 1, “Altix UV GRU Direct Access API”. This chapter describes a subset of the /usr/include/uv/gru/gru_instructions.h file.

Checking the Status of GRU Operations

This section describes software functions used for checking the status of GRU operations, as follows:

extern int gru_check_status_proc(gru_control_block_t *cb); extern int gru_wait_proc(gru_control_block_t *cb); extern void
gru_wait_abort_proc(gru_control_block_t *cb);

extern void gru_abort(int, gru_control_block_t *cb, char *str);

The gru_check_status_proc() and gru_wait_proc() functions return one of the following GRU control block status (CBS) values:

CBS_IDLE		
CBS_EXCEPTION	
CBS_ACTIVE
CBS_CALL_OS	

Displaying GRU Error Information

This section describes software functions used for displaying GRU error information, as follows:

extern char *gru_get_cb_exception_detail_str(int ret, gru_control_block_t *cb,
				char *buf, int size);

GRU Data Transfer Functions

This section describes some GRU data transfer functions.

GRU data transfer functions have some arguments in common with each other:

xtype

xtype - datatype of the transfer. Choose from the following list:

XTYPE_B 

byte

XTYPE_S 

short (2-byte)

XTYPE_W 

word (4-byte)

XTYPE_DW 

doubleword (8-byte)

XTYPE_CL 

cacheline (64-byte)

exopc

exopc - extended opcode for atomic memory operations (AMO).

AMOs implicit operand opcodes

	EOP_IR_FETCH	 /* Plain fetch of memory */
	EOP_IR_CLR	 /* Fetch and clear */
	EOP_IR_INC	 /* Fetch and increment */
	EOP_IR_DEC	 /* Fetch and decrement */
	EOP_IR_QCHK1	 /* Queue check, 64 byte msg */
	EOP_IR_QCHK2	 /* Queue check, 128 byte msg */

	Registered AMOs with implicit operand opcodes

	EOP_IRR_FETCH	 /* Registered fetch of memory */
	EOP_IRR_CLR	 /* Registered fetch and clear */
	EOP_IRR_INC	 /* Registered fetch and increment */
	EOP_IRR_DEC	 /* Registered fetch and decrement */
	EOP_IRR_DECZ	 /* Registered fetch and decrement, update on zero*/

	AMOs with explicit operand opcodes

	EOP_ER_SWAP	 /* Exchange argument and memory */
	EOP_ER_OR	 /* Logical OR with memory */
	EOP_ER_AND	 /* Logical AND with memory */
	EOP_ER_XOR	 /* Logical XOR with memory */
	EOP_ER_ADD	 /* Add value to memory */
	EOP_ER_CSWAP	 /* Compare with operand2, write operand1 if match*/
	EOP_ER_CADD	 /* Queue check, operand1*64 byte msg */

	Registered AMOs with explicit operand opcodes

	EOP_ERR_SWAP	 /* Exchange argument and memory */
	EOP_ERR_OR	 /* Logical OR with memory */
	EOP_ERR_AND	 /* Logical AND with memory */
	EOP_ERR_XOR	 /* Logical XOR with memory */
	EOP_ERR_ADD	 /* Add value to memory */
	EOP_ERR_CSWAP	 /* Compare with operand2, write operand1 if match*/

 AMOs with extened opcodes in DSR

	EOP_XR_CSWAP	 /* Masked compare exchange */

hints

	IMA_CB_DELAY	/* hold read responses until status changes */

Functions for GRU Instructions

This section contains functions for GRU instructions, as follows:

 - nelem and stride are in elements
 - tri0/tri1 is in bytes for the beginning of the data segment.


static inline void gru_vload(gru_control_block_t *cb, void *mem_addr,
		unsigned int tri0, unsigned char xtype, unsigned long nelem,
		unsigned long stride, unsigned long hints) {
	struct gru_instruction *ins = (struct gru_instruction *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->nelem = nelem;
	ins->op1_stride = stride;
	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
					(unsigned long)tri0, hints));
}

static inline void gru_vstore(gru_control_block_t *cb, void *mem_addr,
		unsigned int tri0, unsigned char xtype, unsigned long nelem,
		unsigned long stride, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->nelem = nelem;
	ins->op1_stride = stride;
	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
					tri0, hints));
}

static inline void gru_ivload(gru_control_block_t *cb, void *mem_addr,
		unsigned int tri0, unsigned int tri1, unsigned char xtype,
		unsigned long nelem, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->nelem = nelem;
	ins->tri1_bufsize_64 = tri1;
	gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
					tri0, hints));
}

static inline void gru_ivstore(gru_control_block_t *cb, void *mem_addr,
		unsigned int tri0, unsigned int tri1,
		unsigned char xtype, unsigned long nelem, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->nelem = nelem;
	ins->tri1_bufsize_64 = tri1;
	gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
					tri0, hints));
}

static inline void gru_vset(gru_control_block_t *cb, void *mem_addr,
		unsigned long value, unsigned char xtype, unsigned long nelem,
		unsigned long stride, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->op2_value_baddr1 = value;
	ins->nelem = nelem;
	ins->op1_stride = stride;
	gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
					 0, hints));
}

static inline void gru_ivset(gru_control_block_t *cb, void *mem_addr,
		unsigned int tri1, unsigned long value, unsigned char xtype,
		unsigned long nelem, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->op2_value_baddr1 = value;
	ins->nelem = nelem;
	ins->tri1_bufsize_64 = tri1;
	gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_vflush(gru_control_block_t *cb, void *mem_addr,
		unsigned long nelem, unsigned char xtype, unsigned long stride,
		unsigned long hints)
{
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)mem_addr;
	ins->op1_stride = stride;
	ins->nelem = nelem;
	gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_nop(gru_control_block_t *cb, int hints) {
	struct gru_instruction *ins = (void *)cb;

	gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, hints)); }


static inline void gru_bcopy(gru_control_block_t *cb, const void *src,
		void *dest,
		unsigned int tri0, unsigned int xtype, unsigned long nelem,
		unsigned int bufsize, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

#ifdef UV_REV_1_WARS
	if (tri0 + bufsize * 64 >= 8192)
		gru_abort_bcopy_war(0);
	if (((tri0 + bufsize * 64) & 8191) == 0)	// GRU 1.0 WAR
		gru_abort_bcopy_war(1);
	if (bufsize > 128)				// GRU 1.0 WAR
		gru_abort_bcopy_war(2);
#endif
	ins->baddr0 = (long)src;
	ins->op2_value_baddr1 = (long)dest;
	ins->nelem = nelem;
	ins->tri1_bufsize_64 = bufsize;
	gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
					IAA_RAM, tri0, hints));
}

static inline void gru_bstore(gru_control_block_t *cb, const void *src,
		void *dest, unsigned int tri0, unsigned int xtype,
		unsigned long nelem, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
	ins->op2_value_baddr1 = (long)dest;
	ins->nelem = nelem;
	gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
					tri0, hints));
}

static inline void gru_gamir(gru_control_block_t *cb, int exopc, void *src,
		unsigned int xtype, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
#ifdef UV_REV_1_WARS
	ins->nelem = 1;	// GRU 1.0 WAR
#endif
	gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_gamirr(gru_control_block_t *cb, int exopc, void *src,
		unsigned int xtype, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
#ifdef UV_REV_1_WARS
	ins->nelem = 1;	// GRU 1.0 WAR
#endif
	gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_gamer(gru_control_block_t *cb, int exopc, void *src,
		unsigned int xtype,
		unsigned long operand1, unsigned long operand2,
		unsigned long hints)
{
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
	ins->op1_stride = operand1;
	ins->op2_value_baddr1 = operand2;
#ifdef UV_REV_1_WARS
	ins->nelem = 1;			// GRU 1.0 WAR
#endif
	gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_gamerr(gru_control_block_t *cb, int exopc, void *src,
		unsigned int xtype, unsigned long operand1,
		unsigned long operand2, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
	ins->op1_stride = operand1;
	ins->op2_value_baddr1 = operand2;
#ifdef UV_REV_1_WARS
	ins->nelem = 1;			// GRU 1.0 WAR
#endif
	gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
					0, hints));
}

static inline void gru_gamxr(gru_control_block_t *cb, void *src,
		unsigned int tri0, unsigned long hints) {
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)src;
	ins->nelem = 4;
	gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
				 IAA_RAM, 0, 0, hints));
}

static inline void __gru_mesq(gru_control_block_t *cb, void *queue,
		unsigned long tri0, unsigned long nelem,
		unsigned long hints)
{
	struct gru_instruction *ins = (void *)cb;

	ins->baddr0 = (long)queue;
	ins->nelem = nelem;
	gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
					tri0, hints));
}
#if !defined(UV_REV_1_WARS)
static inline void gru_mesq(gru_control_block_t *cb, void *queue,
		unsigned long tri0, unsigned long nelem,
		unsigned long hints)
{
	__gru_mesq(cb, queue, tri0, nelem, hints); } #else extern void gru_mesq(gru_control_block_t *cb, void *queue,
		unsigned long tri0, unsigned long nelem,
		unsigned long hints);
#endif

static inline unsigned long gru_get_amo_value(gru_control_block_t *cb) {
	struct gru_instruction *ins = (void *)cb;

	return ins->avalue;
}

static inline int gru_get_amo_value_head(gru_control_block_t *cb) {
	struct gru_instruction *ins = (void *)cb;

	return ins->avalue & 0xffffffff;
}

static inline int gru_get_amo_value_limit(gru_control_block_t *cb) {
	struct gru_instruction *ins = (void *)cb;

	return ins->avalue >> 32;
}

static inline union gru_mesqhead  gru_mesq_head(int head, int limit) {
	union gru_mesqhead mqh;

	mqh.head = head;
	mqh.limit = limit;
	return mqh;
}

#define GRU_EXC_STR_SIZE		1024


/*
 * Control block definition for checking status  */ struct gru_control_block_status {
	unsigned int	icmd		:1;
	unsigned int	ima		:3;
	unsigned int	reserved0	:4;
	unsigned int	unused1		:24;
	unsigned int	unused2		:24;
	unsigned int	istatus		:2;
	unsigned int	isubstatus	:4;
	unsigned int	unused3		:2;
};

/* Get CB status */
static inline int gru_get_cb_status(gru_control_block_t *cb) {
	struct gru_control_block_status *cbs = (void *)cb;

	return cbs->istatus;
}

/* Get CB message queue substatus */
static inline int gru_get_cb_message_queue_substatus(gru_control_block_t *cb) {
	struct gru_control_block_status *cbs = (void *)cb;

	return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; }

/* Get CB substatus */
static inline int gru_get_cb_substatus(gru_control_block_t *cb) {
	struct gru_control_block_status *cbs = (void *)cb;

	return cbs->isubstatus;
}

/*
 * User interface to check an instruction status. UPM and exceptions
 * are handled automatically. However, this function does NOT wait
 * for an active instruction to complete.
 *
 */
static inline int gru_check_status(gru_control_block_t *cb) {
	struct gru_control_block_status *cbs = (void *)cb;
	int ret;

	__barrier();
	ret = cbs->istatus;
	/* Must call if IDLE to update statistics */
	if (ret != CBS_ACTIVE)
		ret = gru_check_status_proc(cb);
	return ret;
}

/*
 * User interface (via inline function) to wait for an instruction
 * to complete. Completion status (IDLE or EXCEPTION is returned
 * to the user. Exception due to hardware errors are automatically
 * retried before returning an exception.
 *
 */
static inline int gru_wait(gru_control_block_t *cb) {
	return gru_wait_proc(cb);
}

/*
 * Wait for CB to complete. Aborts program if error. (Note: error does NOT
 * mean TLB mis - only fatal errors such as memory parity error or user
 * bugs will cause termination.
 */
static inline void gru_wait_abort(gru_control_block_t *cb) {
	gru_wait_abort_proc(cb);
}

/*
 * Get a pointer to a control block
 * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
 * 	index	- index of desired CB
 */
static inline gru_control_block_t *gru_get_cb_pointer(gru_segment_t *gseg,
						      int index)
{
	return (void *)gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; }

/*
 * Get a pointer to a cacheline in the data segment portion of a GSeg
 * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
 * 	index	- index of desired cache line
 */
static inline void *gru_get_data_pointer(gru_segment_t *gseg, int index) {
	return (void *)gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; }

/*
 * Convert a vaddr into the tri index within the GSEG
 * 	vaddr		- virtual address of within gseg
 */
static inline int gru_get_tri(void *vaddr) {
	return ((unsigned long)vaddr & (GRU_MIN_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; }

/*
 * Decode and print a GRU instruction.
 */
void gru_print_cb_detail(const char *id, int ret, void *cb);