From 4b1492bb8e7d15927d3f27aac5432a7b19cef28d Mon Sep 17 00:00:00 2001 From: Tarek BOCHKATI Date: Sat, 6 Mar 2021 22:46:35 +0100 Subject: [PATCH] flash/stm32l4x: switch to to c loader instead of assembly loader switching to C loader instead of the assembly version will enhance readability will reduce the maintenance effort. besides the switch to C loader, we added a new parameters to the loader like flash_word_size and flash_sr_bsy_mask in order to support properly STM32U5x and STM32G0Bx/G0Cx in dual-bank mode. Change-Id: I24cafc2ba637a065593a0506eae787b21080a0ba Signed-off-by: Tarek BOCHKATI Reviewed-on: https://review.openocd.org/c/openocd/+/6109 Tested-by: jenkins Reviewed-by: Oleksij Rempel --- contrib/loaders/flash/stm32/Makefile | 9 +- contrib/loaders/flash/stm32/stm32l4x.S | 105 ------------- contrib/loaders/flash/stm32/stm32l4x.c | 189 +++++++++++++++++++++++ contrib/loaders/flash/stm32/stm32l4x.inc | 15 +- src/flash/nor/stm32l4x.c | 72 ++++----- src/flash/nor/stm32l4x.h | 18 +++ 6 files changed, 259 insertions(+), 149 deletions(-) delete mode 100644 contrib/loaders/flash/stm32/stm32l4x.S create mode 100644 contrib/loaders/flash/stm32/stm32l4x.c diff --git a/contrib/loaders/flash/stm32/Makefile b/contrib/loaders/flash/stm32/Makefile index b58b41284..cee282aa3 100644 --- a/contrib/loaders/flash/stm32/Makefile +++ b/contrib/loaders/flash/stm32/Makefile @@ -6,14 +6,19 @@ CC=$(CROSS_COMPILE)gcc OBJCOPY=$(CROSS_COMPILE)objcopy OBJDUMP=$(CROSS_COMPILE)objdump -CFLAGS = -static -nostartfiles -mlittle-endian -Wa,-EL + +AFLAGS = -static -nostartfiles -mlittle-endian -Wa,-EL +CFLAGS = -c -mthumb -nostdlib -nostartfiles -Os -g -fPIC all: stm32f1x.inc stm32f2x.inc stm32h7x.inc stm32l4x.inc stm32lx.inc .PHONY: clean %.elf: %.S - $(CC) $(CFLAGS) $< -o $@ + $(CC) $(AFLAGS) $< -o $@ + +stm32l4x.elf: stm32l4x.c + $(CC) $(CFLAGS) -mcpu=cortex-m0plus -fstack-usage -Wa,-adhln=$(<:.c=.lst) $< -o $@ %.lst: %.elf $(OBJDUMP) -S $< > $@ diff --git a/contrib/loaders/flash/stm32/stm32l4x.S b/contrib/loaders/flash/stm32/stm32l4x.S deleted file mode 100644 index 9923ce772..000000000 --- a/contrib/loaders/flash/stm32/stm32l4x.S +++ /dev/null @@ -1,105 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2010 by Spencer Oliver * - * spen@spen-soft.co.uk * - * * - * Copyright (C) 2011 Øyvind Harboe * - * oyvind.harboe@zylin.com * - * * - * Copyright (C) 2015 Uwe Bonnes * - * bon@elektron.ikp.physik.tu-darmstadt.de * - * * - * Copyright (C) 2018 Andreas Bolsch * - * andreas.bolsch@mni.thm.de * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc. * - ***************************************************************************/ - - .text - .syntax unified - .cpu cortex-m0 - .thumb - -/* - * Params : - * r0 = workarea start, status (out) - * r1 = workarea end + 1 - * r2 = target address - * r3 = count (64bit words) - * r4 = flash status register - * r5 = flash control register - * - * Clobbered: - * r6/7 - temp (64-bit) - */ - -#include "../../../../src/flash/nor/stm32l4x.h" - - .thumb_func - .global _start - -_start: - mov r8, r3 /* copy dword count */ -wait_fifo: - ldr r6, [r0, #0] /* read wp */ - cmp r6, #0 /* if wp == 0, */ - beq exit /* then abort */ - ldr r3, [r0, #4] /* read rp */ - subs r6, r6, r3 /* number of bytes available for read in r6 */ - bpl fifo_stat /* if not wrapped around, skip */ - adds r6, r6, r1 /* add end of buffer */ - subs r6, r6, r0 /* sub start of buffer */ -fifo_stat: - cmp r6, #8 /* wait until at least one dword available */ - bcc wait_fifo - - movs r6, #FLASH_PG /* flash program enable */ - str r6, [r5] /* write to FLASH_CR, start operation */ - ldmia r3!, {r6, r7} /* read one dword from src, increment ptr */ - stmia r2!, {r6, r7} /* write one dword to dst, increment ptr */ - dsb - ldr r7, =FLASH_BSY /* FLASH_BSY mask */ -busy: - ldr r6, [r4] /* get FLASH_SR register */ - tst r6, r7 /* BSY == 1 => operation in progress */ - bne busy /* if still set, wait more ... */ - movs r7, #FLASH_ERROR /* all error bits */ - tst r6, r7 /* check for any error bit */ - bne error /* fail ... */ - - cmp r3, r1 /* rp at end of buffer? */ - bcc upd_rp /* if no, then skip */ - subs r3, r3, r1 /* sub end of buffer */ - adds r3, r3, r0 /* add start of buffer */ - adds r3, r3, #8 /* skip wp and rp */ -upd_rp: - str r3, [r0, #4] /* store rp */ - mov r7, r8 /* get dword count */ - subs r7, r7, #1 /* decrement dword count */ - mov r8, r7 /* save dword count */ - beq exit /* exit if done */ - b wait_fifo - - .pool - -error: - movs r3, #0 - str r3, [r0, #4] /* set rp = 0 on error */ -exit: - mov r0, r6 /* return status in r0 */ - movs r6, #0 /* flash program disable */ - str r6, [r5] /* write to FLASH_CR */ - movs r6, #FLASH_ERROR /* all error bits */ - str r6, [r4] /* write to FLASH_CR to clear errors */ - bkpt #0x00 diff --git a/contrib/loaders/flash/stm32/stm32l4x.c b/contrib/loaders/flash/stm32/stm32l4x.c new file mode 100644 index 000000000..bcad98843 --- /dev/null +++ b/contrib/loaders/flash/stm32/stm32l4x.c @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** + * Copyright (C) 2021 Tarek BOCHKATI + * tarek.bouchkati@st.com + */ + +#include +#include "../../../../src/flash/nor/stm32l4x.h" + +static inline __attribute__((always_inline)) +void copy_buffer_u32(uint32_t *dst, uint32_t *src, int len) +{ + for (int i = 0; i < len; i++) + dst[i] = src[i]; +} + +/* this function is assumes that fifo_size is multiple of flash_word_size + * this condition is ensured by target_run_flash_async_algorithm + */ + +void write(volatile struct stm32l4_work_area *work_area, + uint8_t *fifo_end, + uint8_t *target_address, + uint32_t count) +{ + volatile uint32_t *flash_sr = (uint32_t *) work_area->params.flash_sr_addr; + volatile uint32_t *flash_cr = (uint32_t *) work_area->params.flash_cr_addr; + + /* optimization to avoid reading from memory each time */ + uint8_t *rp_cache = work_area->fifo.rp; + + /* fifo_start is used to wrap when we reach fifo_end */ + uint8_t *fifo_start = rp_cache; + + /* enable flash programming */ + *flash_cr = FLASH_PG; + + while (count) { + /* optimization to avoid reading from memory each time */ + uint8_t *wp_cache = work_area->fifo.wp; + if (wp_cache == 0) + break; /* aborted by target_run_flash_async_algorithm */ + + int32_t fifo_size = wp_cache - rp_cache; + if (fifo_size < 0) { + /* consider the linear fifo, we will wrap later */ + fifo_size = fifo_end - rp_cache; + } + + /* wait for at least a flash word */ + while (fifo_size >= work_area->params.flash_word_size) { + copy_buffer_u32((uint32_t *)target_address, + (uint32_t *)rp_cache, + work_area->params.flash_word_size / 4); + + /* update target_address and rp_cache */ + target_address += work_area->params.flash_word_size; + rp_cache += work_area->params.flash_word_size; + + /* wait for the busy flag */ + while (*flash_sr & work_area->params.flash_sr_bsy_mask) + ; + + if (*flash_sr & FLASH_ERROR) { + work_area->fifo.rp = 0; /* set rp to zero 0 on error */ + goto write_end; + } + + /* wrap if reach the fifo_end, and update rp in memory */ + if (rp_cache >= fifo_end) + rp_cache = fifo_start; + + /* flush the rp cache value, + * so target_run_flash_async_algorithm can fill the circular fifo */ + work_area->fifo.rp = rp_cache; + + /* update fifo_size and count */ + fifo_size -= work_area->params.flash_word_size; + count--; + } + } + +write_end: + /* disable flash programming */ + *flash_cr = 0; + + /* soft break the loader */ + __asm("bkpt 0"); +} + +/* by enabling this define 'DEBUG': + * the main() function can help help debugging the loader algo + * note: the application should be linked into RAM */ + +/* #define DEBUG */ + +#ifdef DEBUG +/* device selector: STM32L5 | STM32U5 | STM32WB | STM32WL | STM32WL_CPU2 | STM32G0Bx | ... */ +#define STM32U5 + +/* when using a secure device, and want to test the secure programming enable this define */ +/* #define SECURE */ + +#if defined(STM32U5) +# define FLASH_WORD_SIZE 16 +#else +# define FLASH_WORD_SIZE 8 +#endif + +#if defined(STM32WB) || defined(STM32WL) +# define FLASH_BASE 0x58004000 +#else +# define FLASH_BASE 0x40022000 +#endif + +#if defined(STM32G0Bx) +# define FLASH_BSY_MASK (FLASH_BSY | FLASH_BSY2) +#else +# define FLASH_BSY_MASK FLASH_BSY +#endif + +#if defined(STM32L5) || defined(STM32U5) +# ifdef SECURE +# define FLASH_KEYR_OFFSET 0x0c +# define FLASH_SR_OFFSET 0x24 +# define FLASH_CR_OFFSET 0x2c +# else +# define FLASH_KEYR_OFFSET 0x08 +# define FLASH_SR_OFFSET 0x20 +# define FLASH_CR_OFFSET 0x28 +# endif +#elif defined(STM32WL_CPU2) +# define FLASH_KEYR_OFFSET 0x08 +# define FLASH_SR_OFFSET 0x60 +# define FLASH_CR_OFFSET 0x64 +#else +# define FLASH_KEYR_OFFSET 0x08 +# define FLASH_SR_OFFSET 0x10 +# define FLASH_CR_OFFSET 0x14 +#endif + +#define FLASH_KEYR (uint32_t *)((FLASH_BASE) + (FLASH_KEYR_OFFSET)) +#define FLASH_SR (uint32_t *)((FLASH_BASE) + (FLASH_SR_OFFSET)) +#define FLASH_CR (uint32_t *)((FLASH_BASE) + (FLASH_CR_OFFSET)) + +int main() +{ + const uint32_t count = 2; + const uint32_t buf_size = count * FLASH_WORD_SIZE; + const uint32_t work_area_size = sizeof(struct stm32l4_work_area) + buf_size; + + uint8_t work_area_buf[work_area_size]; + struct stm32l4_work_area *workarea = (struct stm32l4_work_area *)work_area_buf; + + /* fill the workarea struct */ + workarea->params.flash_sr_addr = (uint32_t)(FLASH_SR); + workarea->params.flash_cr_addr = (uint32_t)(FLASH_CR); + workarea->params.flash_word_size = FLASH_WORD_SIZE; + workarea->params.flash_sr_bsy_mask = FLASH_BSY_MASK; + /* note: the workarea->stack is not used, in this configuration */ + + /* programming the existing memory raw content in workarea->fifo.buf */ + /* feel free to fill the memory with magical values ... */ + + workarea->fifo.wp = (uint8_t *)(&workarea->fifo.buf + buf_size); + workarea->fifo.rp = (uint8_t *)&workarea->fifo.buf; + + /* unlock the flash */ + *FLASH_KEYR = KEY1; + *FLASH_KEYR = KEY2; + + /* erase sector 0 */ + *FLASH_CR = FLASH_PER | FLASH_STRT; + while (*FLASH_SR & FLASH_BSY) + ; + + /* flash address, should be aligned to FLASH_WORD_SIZE */ + uint8_t *target_address = (uint8_t *) 0x8000000; + + write(workarea, + (uint8_t *)(workarea + work_area_size), + target_address, + count); + + while (1) + ; +} +#endif /* DEBUG */ diff --git a/contrib/loaders/flash/stm32/stm32l4x.inc b/contrib/loaders/flash/stm32/stm32l4x.inc index df5c7edd1..1ab400a0a 100644 --- a/contrib/loaders/flash/stm32/stm32l4x.inc +++ b/contrib/loaders/flash/stm32/stm32l4x.inc @@ -1,7 +1,10 @@ /* Autogenerated with ../../../../src/helper/bin2char.sh */ -0x98,0x46,0x06,0x68,0x00,0x2e,0x23,0xd0,0x43,0x68,0xf6,0x1a,0x01,0xd5,0x76,0x18, -0x36,0x1a,0x08,0x2e,0xf5,0xd3,0x01,0x26,0x2e,0x60,0xc0,0xcb,0xc0,0xc2,0xbf,0xf3, -0x4f,0x8f,0x09,0x4f,0x26,0x68,0x3e,0x42,0xfc,0xd1,0xfa,0x27,0x3e,0x42,0x0d,0xd1, -0x8b,0x42,0x02,0xd3,0x5b,0x1a,0x1b,0x18,0x08,0x33,0x43,0x60,0x47,0x46,0x01,0x3f, -0xb8,0x46,0x05,0xd0,0xdd,0xe7,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x23,0x43,0x60, -0x30,0x46,0x00,0x26,0x2e,0x60,0xfa,0x26,0x26,0x60,0x00,0xbe, +0xf0,0xb5,0x87,0xb0,0x07,0x68,0x01,0x93,0x43,0x68,0x04,0x91,0x02,0x93,0x83,0x6f, +0x02,0x99,0x03,0x93,0x01,0x23,0x0b,0x60,0x03,0x9b,0x01,0x99,0x00,0x29,0x1f,0xd0, +0x41,0x6f,0x00,0x29,0x1c,0xd0,0xc9,0x1a,0x01,0xd5,0x04,0x99,0xc9,0x1a,0x84,0x68, +0x8c,0x42,0xf2,0xd8,0x85,0x68,0xac,0x08,0x05,0x94,0x00,0x24,0x05,0x9d,0xa5,0x42, +0x14,0xdc,0x84,0x68,0x12,0x19,0x84,0x68,0x1b,0x19,0x3c,0x68,0xc5,0x68,0x2e,0x00, +0x26,0x40,0x25,0x42,0xf9,0xd1,0xfa,0x25,0x3c,0x68,0x2c,0x42,0x0b,0xd0,0x86,0x67, +0x00,0x23,0x02,0x9a,0x13,0x60,0x00,0xbe,0x07,0xb0,0xf0,0xbd,0xa6,0x00,0x9d,0x59, +0x01,0x34,0x95,0x51,0xe2,0xe7,0x04,0x9c,0x9c,0x42,0x00,0xd8,0x03,0x9b,0x83,0x67, +0x84,0x68,0x09,0x1b,0x01,0x9c,0x01,0x3c,0x01,0x94,0xd0,0xe7, diff --git a/src/flash/nor/stm32l4x.c b/src/flash/nor/stm32l4x.c index 3c055616f..ef15013ea 100644 --- a/src/flash/nor/stm32l4x.c +++ b/src/flash/nor/stm32l4x.c @@ -1319,11 +1319,10 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer, { struct target *target = bank->target; struct stm32l4_flash_bank *stm32l4_info = bank->driver_priv; - uint32_t buffer_size; struct working_area *write_algorithm; struct working_area *source; uint32_t address = bank->base + offset; - struct reg_param reg_params[6]; + struct reg_param reg_params[5]; struct armv7m_algorithm armv7m_info; int retval = ERROR_OK; @@ -1345,12 +1344,13 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer, return retval; } - /* memory buffer, size *must* be multiple of stm32l4_info->data_width - * plus one dword for rp and one for wp */ - /* FIXME, currently only STM32U5 devices do have a different data_width, - * but STM32U5 device flash programming does not go through this function - * so temporarily continue to consider the default data_width = 8 */ - buffer_size = target_get_working_area_avail(target) & ~(2 * sizeof(uint32_t) - 1); + /* data_width should be multiple of double-word */ + assert(stm32l4_info->data_width % 8 == 0); + const size_t extra_size = sizeof(struct stm32l4_work_area); + uint32_t buffer_size = target_get_working_area_avail(target) - extra_size; + /* buffer_size should be multiple of stm32l4_info->data_width */ + buffer_size &= ~(stm32l4_info->data_width - 1); + if (buffer_size < 256) { LOG_WARNING("large enough working area not available, can't do block memory writes"); target_free_working_area(target, write_algorithm); @@ -1360,7 +1360,7 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer, buffer_size = 16384; } - if (target_alloc_working_area_try(target, buffer_size, &source) != ERROR_OK) { + if (target_alloc_working_area_try(target, buffer_size + extra_size, &source) != ERROR_OK) { LOG_ERROR("allocating working area failed"); return ERROR_TARGET_RESOURCE_NOT_AVAILABLE; } @@ -1371,28 +1371,46 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer, init_reg_param(®_params[0], "r0", 32, PARAM_IN_OUT); /* buffer start, status (out) */ init_reg_param(®_params[1], "r1", 32, PARAM_OUT); /* buffer end */ init_reg_param(®_params[2], "r2", 32, PARAM_OUT); /* target address */ - init_reg_param(®_params[3], "r3", 32, PARAM_OUT); /* count (double word-64bit) */ - init_reg_param(®_params[4], "r4", 32, PARAM_OUT); /* flash status register */ - init_reg_param(®_params[5], "r5", 32, PARAM_OUT); /* flash control register */ + init_reg_param(®_params[3], "r3", 32, PARAM_OUT); /* count (of stm32l4_info->data_width) */ + init_reg_param(®_params[4], "sp", 32, PARAM_OUT); /* write algo stack pointer */ buf_set_u32(reg_params[0].value, 0, 32, source->address); buf_set_u32(reg_params[1].value, 0, 32, source->address + source->size); buf_set_u32(reg_params[2].value, 0, 32, address); buf_set_u32(reg_params[3].value, 0, 32, count); - buf_set_u32(reg_params[4].value, 0, 32, stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX)); - buf_set_u32(reg_params[5].value, 0, 32, stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_CR_INDEX)); + buf_set_u32(reg_params[4].value, 0, 32, source->address + + offsetof(struct stm32l4_work_area, stack) + LDR_STACK_SIZE); + + struct stm32l4_loader_params loader_extra_params; + + target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_sr_addr, + stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX)); + target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_cr_addr, + stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_CR_INDEX)); + target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_word_size, + stm32l4_info->data_width); + target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_sr_bsy_mask, + stm32l4_info->sr_bsy_mask); + + retval = target_write_buffer(target, source->address, sizeof(loader_extra_params), + (uint8_t *) &loader_extra_params); + if (retval != ERROR_OK) + return retval; retval = target_run_flash_async_algorithm(target, buffer, count, stm32l4_info->data_width, 0, NULL, ARRAY_SIZE(reg_params), reg_params, - source->address, source->size, + source->address + offsetof(struct stm32l4_work_area, fifo), + source->size - offsetof(struct stm32l4_work_area, fifo), write_algorithm->address, 0, &armv7m_info); if (retval == ERROR_FLASH_OPERATION_FAILED) { LOG_ERROR("error executing stm32l4 flash write algorithm"); - uint32_t error = buf_get_u32(reg_params[0].value, 0, 32) & FLASH_ERROR; + uint32_t error; + stm32l4_read_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX, &error); + error &= FLASH_ERROR; if (error & FLASH_WRPERR) LOG_ERROR("flash memory write protected"); @@ -1413,7 +1431,6 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer, destroy_reg_param(®_params[2]); destroy_reg_param(®_params[3]); destroy_reg_param(®_params[4]); - destroy_reg_param(®_params[5]); return retval; } @@ -1538,24 +1555,7 @@ static int stm32l4_write(struct flash_bank *bank, const uint8_t *buffer, if (retval != ERROR_OK) goto err_lock; - /** - * FIXME update the flash loader to use a custom FLASH_SR_BSY mask - * Workaround for STM32G0Bx/G0Cx devices in dual bank mode, - * as the flash loader does not use the SR_BSY2 - */ - bool use_flashloader = stm32l4_info->use_flashloader; - if ((stm32l4_info->part_info->id == 0x467) && stm32l4_info->dual_bank_mode) { - LOG_INFO("Couldn't use the flash loader in dual-bank mode"); - use_flashloader = false; - } else if (stm32l4_info->part_info->id == 0x482) { - /** - * FIXME the current flashloader does not support writing in quad-words - * which is required for STM32U5 devices. - */ - use_flashloader = false; - } - - if (use_flashloader) { + if (stm32l4_info->use_flashloader) { /* For TrustZone enabled devices, when TZEN is set and RDP level is 0.5, * the debug is possible only in non-secure state. * Thus means the flashloader will run in non-secure mode, @@ -1567,7 +1567,7 @@ static int stm32l4_write(struct flash_bank *bank, const uint8_t *buffer, count / stm32l4_info->data_width); } - if (!use_flashloader || retval == ERROR_TARGET_RESOURCE_NOT_AVAILABLE) { + if (!stm32l4_info->use_flashloader || retval == ERROR_TARGET_RESOURCE_NOT_AVAILABLE) { LOG_INFO("falling back to single memory accesses"); retval = stm32l4_write_block_without_loader(bank, buffer, offset, count / stm32l4_info->data_width); diff --git a/src/flash/nor/stm32l4x.h b/src/flash/nor/stm32l4x.h index 7b9162b08..2d19cffff 100644 --- a/src/flash/nor/stm32l4x.h +++ b/src/flash/nor/stm32l4x.h @@ -92,4 +92,22 @@ #define STM32L5_REGS_SEC_OFFSET 0x10000000 +/* 100 bytes as loader stack should be large enough for the loader to operate */ +#define LDR_STACK_SIZE 100 + +struct stm32l4_work_area { + struct stm32l4_loader_params { + uint32_t flash_sr_addr; + uint32_t flash_cr_addr; + uint32_t flash_word_size; + uint32_t flash_sr_bsy_mask; + } params; + uint8_t stack[LDR_STACK_SIZE]; + struct flash_async_algorithm_circbuf { + uint8_t *wp; + uint8_t *rp; + uint8_t *buf; + } fifo; +}; + #endif