stm32f1x: use async algorithm in flash programming routine

Let the target algorithm be running in the background and buffer data
continuously through a FIFO. This reduces or removes the effect of latency
because only a very small number of queue executions needs to be done per
buffer fill. Previously, the many repeated target state changes, register
accesses (really inefficient) and algorithm uploads caused the flash
programming to be latency bound in many cases. Now it should scale better
with increased throughput.

Signed-off-by: Andreas Fritiofson <andreas.fritiofson@gmail.com>
This commit is contained in:
Andreas Fritiofson 2011-07-31 10:31:56 +02:00
parent a147563ac1
commit 92b14f8ca9
2 changed files with 204 additions and 83 deletions

View File

@ -1,6 +1,6 @@
/*************************************************************************** /***************************************************************************
* Copyright (C) 2010 by Spencer Oliver * * Copyright (C) 2011 by Andreas Fritiofson *
* spen@spen-soft.co.uk * * andreas.fritiofson@gmail.com *
* * * *
* This program is free software; you can redistribute it and/or modify * * This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by * * it under the terms of the GNU General Public License as published by *
@ -25,34 +25,47 @@
.thumb_func .thumb_func
.global write .global write
/* /* Params:
r0 - source address * r0 - flash base (in), status (out)
r1 - target address * r1 - count (halfword-16bit)
r2 - count (halfword-16bit) * r2 - workarea start
r3 - sector offet in : result out * r3 - workarea end
r4 - flash base * r4 - target address
*/ * Clobbered:
* r5 - rp
* r6 - wp, tmp
*/
#define STM32_FLASH_CR_OFFSET 0x10 /* offset of CR register in FLASH struct */ #define STM32_FLASH_CR_OFFSET 0x10 /* offset of CR register from flash reg base */
#define STM32_FLASH_SR_OFFSET 0x0c /* offset of CR register in FLASH struct */ #define STM32_FLASH_SR_OFFSET 0x0c /* offset of SR register from flash reg base */
write: wait_fifo:
ldr r4, STM32_FLASH_BASE ldr r6, [r2, #0] /* read wp */
add r4, r3 /* add offset 0x00 for sector 0 : 0x40 for sector 1 */ cmp r6, #0 /* abort if wp == 0 */
write_half_word: beq exit
movs r3, #0x01 ldr r5, [r2, #4] /* read rp */
str r3, [r4, #STM32_FLASH_CR_OFFSET] /* PG (bit0) == 1 => flash programming enabled */ cmp r5, r6 /* wait until rp != wp */
ldrh r3, [r0], #0x02 /* read one half-word from src, increment ptr */ beq wait_fifo
strh r3, [r1], #0x02 /* write one half-word from src, increment ptr */ movs r6, #1 /* set PG flag to enable flash programming */
str r6, [r0, #STM32_FLASH_CR_OFFSET]
ldrh r6, [r5], #2 /* "*target_address++ = *rp++" */
strh r6, [r4], #2
busy: busy:
ldr r3, [r4, #STM32_FLASH_SR_OFFSET] ldr r6, [r0, #STM32_FLASH_SR_OFFSET] /* wait until BSY flag is reset */
tst r3, #0x01 /* BSY (bit0) == 1 => operation in progress */ tst r6, #1
beq busy /* wait more... */ bne busy
tst r3, #0x14 /* PGERR (bit2) == 1 or WRPRTERR (bit4) == 1 => error */ tst r6, #0x14 /* check the error bits */
bne exit /* fail... */ bne error
subs r2, r2, #0x01 /* decrement counter */ cmp r5, r3 /* wrap rp at end of buffer */
bne write_half_word /* write next half-word if anything left */ it cs
addcs r5, r2, #8
str r5, [r2, #4] /* store rp */
subs r1, r1, #1 /* decrement halfword count */
cbz r1, exit /* loop if not done */
b wait_fifo
error:
movs r0, #0
str r0, [r2, #2] /* set rp = 0 on error */
exit: exit:
bkpt #0x00 mov r0, r6 /* return status in r0 */
bkpt #0
STM32_FLASH_BASE: .word 0x40022000 /* base address of FLASH struct */

View File

@ -5,6 +5,9 @@
* Copyright (C) 2008 by Spencer Oliver * * Copyright (C) 2008 by Spencer Oliver *
* spen@spen-soft.co.uk * * spen@spen-soft.co.uk *
* * * *
* Copyright (C) 2011 by Andreas Fritiofson *
* andreas.fritiofson@gmail.com *
*
* This program is free software; you can redistribute it and/or modify * * This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by * * it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or * * the Free Software Foundation; either version 2 of the License, or *
@ -623,34 +626,45 @@ static int stm32x_write_block(struct flash_bank *bank, uint8_t *buffer,
uint32_t buffer_size = 16384; uint32_t buffer_size = 16384;
struct working_area *source; struct working_area *source;
uint32_t address = bank->base + offset; uint32_t address = bank->base + offset;
struct reg_param reg_params[4]; struct reg_param reg_params[5];
struct armv7m_algorithm armv7m_info; struct armv7m_algorithm armv7m_info;
int retval = ERROR_OK; int retval = ERROR_OK;
/* see contib/loaders/flash/stm32x.s for src */ /* see contrib/loaders/flash/stm32f1x.S for src */
static const uint8_t stm32x_flash_write_code[] = { static const uint8_t stm32x_flash_write_code[] = {
/* #define STM32_FLASH_CR_OFFSET 0x10 */ /* #define STM32_FLASH_CR_OFFSET 0x10 */
/* #define STM32_FLASH_SR_OFFSET 0x0C */ /* #define STM32_FLASH_SR_OFFSET 0x0C */
/* write: */ /* wait_fifo: */
0x08, 0x4c, /* ldr r4, STM32_FLASH_BASE */ 0x16, 0x68, /* ldr r6, [r2, #0] */
0x1c, 0x44, /* add r4, r3 */ 0x00, 0x2e, /* cmp r6, #0 */
/* write_half_word: */ 0x1a, 0xd0, /* beq exit */
0x01, 0x23, /* movs r3, #0x01 */ 0x55, 0x68, /* ldr r5, [r2, #4] */
0x23, 0x61, /* str r3, [r4, #STM32_FLASH_CR_OFFSET] */ 0xb5, 0x42, /* cmp r5, r6 */
0x30, 0xf8, 0x02, 0x3b, /* ldrh r3, [r0], #0x02 */ 0xf9, 0xd0, /* beq wait_fifo */
0x21, 0xf8, 0x02, 0x3b, /* strh r3, [r1], #0x02 */ 0x01, 0x26, /* movs r6, #1 */
/* busy: */ 0x06, 0x61, /* str r6, [r0, #STM32_FLASH_CR_OFFSET] */
0xe3, 0x68, /* ldr r3, [r4, #STM32_FLASH_SR_OFFSET] */ 0x35, 0xf8, 0x02, 0x6b, /* ldrh r6, [r5], #2 */
0x13, 0xf0, 0x01, 0x0f, /* tst r3, #0x01 */ 0x24, 0xf8, 0x02, 0x6b, /* strh r6, [r4], #2 */
0xfb, 0xd0, /* beq busy */ /* busy: */
0x13, 0xf0, 0x14, 0x0f, /* tst r3, #0x14 */ 0xc6, 0x68, /* ldr r6, [r0, #STM32_FLASH_SR_OFFSET] */
0x01, 0xd1, /* bne exit */ 0x16, 0xf0, 0x01, 0x0f, /* tst r6, #1 */
0x01, 0x3a, /* subs r2, r2, #0x01 */ 0xfb, 0xd1, /* bne busy */
0xf0, 0xd1, /* bne write_half_word */ 0x16, 0xf0, 0x14, 0x0f, /* tst r6, #0x14 */
/* exit: */ 0x07, 0xd1, /* bne error */
0x00, 0xbe, /* bkpt #0x00 */ 0x9d, 0x42, /* cmp r5, r3 */
0x00, 0x20, 0x02, 0x40, /* STM32_FLASH_BASE: .word 0x40022000 */ 0x28, 0xbf, /* it cs */
0x02, 0xf1, 0x08, 0x05, /* addcs r5, r2, #8 */
0x55, 0x60, /* str r5, [r2, #4] */
0x01, 0x39, /* subs r1, r1, #1 */
0x19, 0xb1, /* cbz r1, exit */
0xe4, 0xe7, /* b wait_fifo */
/* error: */
0x00, 0x20, /* movs r0, #0 */
0xc2, 0xf8, 0x02, 0x00, /* str r0, [r2, #2] */
/* exit: */
0x30, 0x46, /* mov r0, r6 */
0x00, 0xbe, /* bkpt #0 */
}; };
/* flash write code */ /* flash write code */
@ -670,6 +684,7 @@ static int stm32x_write_block(struct flash_bank *bank, uint8_t *buffer,
while (target_alloc_working_area_try(target, buffer_size, &source) != ERROR_OK) while (target_alloc_working_area_try(target, buffer_size, &source) != ERROR_OK)
{ {
buffer_size /= 2; buffer_size /= 2;
buffer_size &= ~3UL; // Make sure it's 4 byte aligned
if (buffer_size <= 256) if (buffer_size <= 256)
{ {
/* if we already allocated the writing code, but failed to get a /* if we already allocated the writing code, but failed to get a
@ -682,60 +697,152 @@ static int stm32x_write_block(struct flash_bank *bank, uint8_t *buffer,
} }
}; };
/* Set up working area. First word is write pointer, second word is read pointer,
* rest is fifo data area. */
uint32_t wp_addr = source->address;
uint32_t rp_addr = source->address + 4;
uint32_t fifo_start_addr = source->address + 8;
uint32_t fifo_end_addr = source->address + source->size;
uint32_t wp = fifo_start_addr;
uint32_t rp = fifo_start_addr;
retval = target_write_u32(target, wp_addr, wp);
if (retval != ERROR_OK)
return retval;
retval = target_write_u32(target, rp_addr, rp);
if (retval != ERROR_OK)
return retval;
init_reg_param(&reg_params[0], "r0", 32, PARAM_IN_OUT); /* flash base (in), status (out) */
init_reg_param(&reg_params[1], "r1", 32, PARAM_OUT); /* count (halfword-16bit) */
init_reg_param(&reg_params[2], "r2", 32, PARAM_OUT); /* buffer start */
init_reg_param(&reg_params[3], "r3", 32, PARAM_OUT); /* buffer end */
init_reg_param(&reg_params[4], "r4", 32, PARAM_IN_OUT); /* target address */
buf_set_u32(reg_params[0].value, 0, 32, stm32x_info->register_base);
buf_set_u32(reg_params[1].value, 0, 32, count);
buf_set_u32(reg_params[2].value, 0, 32, source->address);
buf_set_u32(reg_params[3].value, 0, 32, source->address + source->size);
buf_set_u32(reg_params[4].value, 0, 32, address);
armv7m_info.common_magic = ARMV7M_COMMON_MAGIC; armv7m_info.common_magic = ARMV7M_COMMON_MAGIC;
armv7m_info.core_mode = ARMV7M_MODE_ANY; armv7m_info.core_mode = ARMV7M_MODE_ANY;
init_reg_param(&reg_params[0], "r0", 32, PARAM_OUT); /* Start up algorithm on target and let it idle while writing the first chunk */
init_reg_param(&reg_params[1], "r1", 32, PARAM_OUT); if ((retval = target_start_algorithm(target, 0, NULL, 5, reg_params,
init_reg_param(&reg_params[2], "r2", 32, PARAM_OUT); stm32x_info->write_algorithm->address,
init_reg_param(&reg_params[3], "r3", 32, PARAM_IN_OUT); 0,
&armv7m_info)) != ERROR_OK)
{
LOG_ERROR("error starting stm32x flash write algorithm");
goto cleanup;
}
while (count > 0) while (count > 0)
{ {
uint32_t thisrun_count = (count > (buffer_size / 2)) ? retval = target_read_u32(target, rp_addr, &rp);
(buffer_size / 2) : count; if (retval != ERROR_OK)
if ((retval = target_write_buffer(target, source->address,
thisrun_count * 2, buffer)) != ERROR_OK)
break;
buf_set_u32(reg_params[0].value, 0, 32, source->address);
buf_set_u32(reg_params[1].value, 0, 32, address);
buf_set_u32(reg_params[2].value, 0, 32, thisrun_count);
buf_set_u32(reg_params[3].value, 0, 32, stm32x_info->register_base - FLASH_REG_BASE_B0);
if ((retval = target_run_algorithm(target, 0, NULL, 4, reg_params,
stm32x_info->write_algorithm->address,
0,
10000, &armv7m_info)) != ERROR_OK)
{ {
LOG_ERROR("error executing stm32x flash write algorithm"); LOG_ERROR("failed to get read pointer");
break; break;
} }
if (buf_get_u32(reg_params[3].value, 0, 32) & FLASH_PGERR) LOG_DEBUG("count 0x%"PRIx32" wp 0x%"PRIx32" rp 0x%"PRIx32, count, wp, rp);
if (rp == 0)
{
LOG_ERROR("flash write algorithm aborted by target");
retval = ERROR_FLASH_OPERATION_FAILED;
break;
}
if ((rp & 1) || rp < fifo_start_addr || rp >= fifo_end_addr)
{
LOG_ERROR("corrupted fifo read pointer 0x%"PRIx32, rp);
break;
}
/* Count the number of bytes available in the fifo without
* crossing the wrap around. Make sure to not fill it completely,
* because that would make wp == rp and that's the empty condition. */
uint32_t thisrun_bytes;
if (rp > wp)
thisrun_bytes = rp - wp - 2;
else if (rp > fifo_start_addr)
thisrun_bytes = fifo_end_addr - wp;
else
thisrun_bytes = fifo_end_addr - wp - 2;
if (thisrun_bytes == 0)
{
/* Throttle polling a bit if transfer is (much) faster than flash
* programming. The exact delay shouldn't matter as long as it's
* less than buffer size / flash speed. This is very unlikely to
* run when using high latency connections such as USB. */
alive_sleep(10);
continue;
}
/* Limit to the amount of data we actually want to write */
if (thisrun_bytes > count * 2)
thisrun_bytes = count * 2;
/* Write data to fifo */
retval = target_write_buffer(target, wp, thisrun_bytes, buffer);
if (retval != ERROR_OK)
break;
/* Update counters and wrap write pointer */
buffer += thisrun_bytes;
count -= thisrun_bytes / 2;
wp += thisrun_bytes;
if (wp >= fifo_end_addr)
wp = fifo_start_addr;
/* Store updated write pointer to target */
retval = target_write_u32(target, wp_addr, wp);
if (retval != ERROR_OK)
break;
}
if (retval != ERROR_OK)
{
/* abort flash write algorithm on target */
target_write_u32(target, wp_addr, 0);
}
int retval2;
if ((retval2 = target_wait_algorithm(target, 0, NULL, 5, reg_params,
0,
10000,
&armv7m_info)) != ERROR_OK)
{
LOG_ERROR("error waiting for stm32x flash write algorithm");
retval = retval2;
}
if (retval == ERROR_FLASH_OPERATION_FAILED)
{
LOG_ERROR("flash write failed at address 0x%"PRIx32,
buf_get_u32(reg_params[4].value, 0, 32));
if (buf_get_u32(reg_params[0].value, 0, 32) & FLASH_PGERR)
{ {
LOG_ERROR("flash memory not erased before writing"); LOG_ERROR("flash memory not erased before writing");
/* Clear but report errors */ /* Clear but report errors */
target_write_u32(target, STM32_FLASH_SR_B0, FLASH_PGERR); target_write_u32(target, STM32_FLASH_SR_B0, FLASH_PGERR);
retval = ERROR_FAIL;
break;
} }
if (buf_get_u32(reg_params[3].value, 0, 32) & FLASH_WRPRTERR) if (buf_get_u32(reg_params[0].value, 0, 32) & FLASH_WRPRTERR)
{ {
LOG_ERROR("flash memory write protected"); LOG_ERROR("flash memory write protected");
/* Clear but report errors */ /* Clear but report errors */
target_write_u32(target, STM32_FLASH_SR_B0, FLASH_WRPRTERR); target_write_u32(target, STM32_FLASH_SR_B0, FLASH_WRPRTERR);
retval = ERROR_FAIL;
break;
} }
buffer += thisrun_count * 2;
address += thisrun_count * 2;
count -= thisrun_count;
} }
cleanup:
target_free_working_area(target, source); target_free_working_area(target, source);
target_free_working_area(target, stm32x_info->write_algorithm); target_free_working_area(target, stm32x_info->write_algorithm);
@ -743,6 +850,7 @@ static int stm32x_write_block(struct flash_bank *bank, uint8_t *buffer,
destroy_reg_param(&reg_params[1]); destroy_reg_param(&reg_params[1]);
destroy_reg_param(&reg_params[2]); destroy_reg_param(&reg_params[2]);
destroy_reg_param(&reg_params[3]); destroy_reg_param(&reg_params[3]);
destroy_reg_param(&reg_params[4]);
return retval; return retval;
} }