arm11: improve performance using minidriver hook

zy1000 performance for GDB load went from 100kBytes/s
to 300kBytes/s @ 8 MHz by implementing the inner loop
of unack arm11 memory writes directly on top of the hw
fifo.

Profiling info:

 78.57      0.77     0.77                             arm11_run_instr_data_to_core_noack_inner
  5.10      0.82     0.05                             memcpy
  4.08      0.86     0.04                             jtag_tap_next_enabled
  3.06      0.89     0.03                             gdb_input

Signed-off-by: Øyvind Harboe <oyvind.harboe@zylin.com>
This commit is contained in:
Øyvind Harboe 2010-02-09 14:26:57 +01:00
parent 1f5883ea56
commit faef631a4d

View File

@ -845,11 +845,73 @@ void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer,
}
int arm11_run_instr_data_to_core_noack_inner_default(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count);
int arm11_run_instr_data_to_core_noack_inner(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count)
int arm11_run_instr_data_to_core_noack_inner(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count)
{
return arm11_run_instr_data_to_core_noack_inner_default(arm11, opcode, data, count);
#if 0
int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count);
return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count);
#else
static const int bits[] = {32, 2};
uint32_t values[] = {0, 0};
/* FIX!!!!!! the target_write_memory() API started this nasty problem
* with unaligned uint32_t * pointers... */
const uint8_t *t = (const uint8_t *)data;
while (count--)
{
values[0] = *t++;
values[0] |= (*t++<<8);
values[0] |= (*t++<<16);
values[0] |= (*t++<<24);
if (count > 0)
{
jtag_add_dr_out(tap,
2,
bits,
values,
TAP_DRPAUSE);
#if 1
/* copy & paste from arm11_dbgtap.c */
//TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
waitIdle();
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
waitIdle();
ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT);
#else
static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] =
{
TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
};
jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay),
arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay);
#endif
} else
{
/* This will happen on the last iteration updating the current tap state
* so we don't have to track it during the common code path */
jtag_add_dr_out(tap,
2,
bits,
values,
TAP_IDLE);
}
}
return jtag_execute_queue();
#endif
}