mirror of
https://github.com/google/pebble.git
synced 2025-07-05 06:10:27 -04:00
448 lines
15 KiB
C
448 lines
15 KiB
C
/*
|
|
* Copyright 2024 Google LLC
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "drivers/qspi.h"
|
|
#include "drivers/qspi_definitions.h"
|
|
|
|
#include "board/board.h"
|
|
#include "drivers/dma.h"
|
|
#include "drivers/flash/flash_impl.h"
|
|
#include "drivers/gpio.h"
|
|
#include "drivers/periph_config.h"
|
|
#include "kernel/util/delay.h"
|
|
#include "kernel/util/stop.h"
|
|
#include "mcu/cache.h"
|
|
#include "system/logging.h"
|
|
#include "system/passert.h"
|
|
#include "util/math.h"
|
|
|
|
#define STM32F4_COMPATIBLE
|
|
#define STM32F7_COMPATIBLE
|
|
#include <mcu.h>
|
|
|
|
#include "FreeRTOS.h"
|
|
#include "semphr.h"
|
|
|
|
//! Address value which signifies no address being sent
|
|
#define QSPI_ADDR_NO_ADDR (UINT32_MAX)
|
|
//! Word size for DMA reads
|
|
#define QSPI_DMA_READ_WORD_SIZE (4)
|
|
|
|
|
|
void qspi_init(QSPIPort *dev, uint32_t flash_size) {
|
|
// Init the DMA semaphore, used for read
|
|
dev->state->dma_semaphore = xSemaphoreCreateBinary();
|
|
dma_request_init(dev->dma);
|
|
|
|
// init GPIOs
|
|
gpio_af_init(&dev->cs_gpio, GPIO_OType_PP, GPIO_Speed_100MHz, GPIO_PuPd_NOPULL);
|
|
gpio_af_init(&dev->clk_gpio, GPIO_OType_PP, GPIO_Speed_100MHz, GPIO_PuPd_NOPULL);
|
|
for (int i = 0; i < QSPI_NUM_DATA_PINS; i++) {
|
|
gpio_af_init(&dev->data_gpio[i], GPIO_OType_PP, GPIO_Speed_100MHz, GPIO_PuPd_NOPULL);
|
|
}
|
|
|
|
// calculate the prescaler
|
|
RCC_ClocksTypeDef RCC_ClocksStatus;
|
|
RCC_GetClocksFreq(&RCC_ClocksStatus);
|
|
int prescaler = RCC_ClocksStatus.HCLK_Frequency / dev->clock_speed_hz;
|
|
if ((RCC_ClocksStatus.HCLK_Frequency / prescaler) > dev->clock_speed_hz) {
|
|
// The desired prescaler is not an integer, so we'll round up so that the clock is never
|
|
// faster than the desired frequency.
|
|
prescaler++;
|
|
}
|
|
|
|
// enable clock while we initialize QSPI
|
|
qspi_use(dev);
|
|
|
|
// round the flash size up to the nearest power of 2 and calculate QSPI_FSize
|
|
uint32_t fsize_value = ceil_log_two(flash_size) - 1;
|
|
PBL_ASSERTN(flash_size == (uint32_t)1 << ceil_log_two(flash_size));
|
|
|
|
// Init QSPI peripheral
|
|
QSPI_InitTypeDef qspi_config;
|
|
QSPI_StructInit(&qspi_config);
|
|
qspi_config.QSPI_SShift = QSPI_SShift_HalfCycleShift;
|
|
// QSPI clock = AHB / (1 + QSPI_Prescaler)
|
|
qspi_config.QSPI_Prescaler = prescaler - 1;
|
|
qspi_config.QSPI_CKMode = QSPI_CKMode_Mode0;
|
|
qspi_config.QSPI_CSHTime = QSPI_CSHTime_1Cycle;
|
|
qspi_config.QSPI_FSize = fsize_value;
|
|
qspi_config.QSPI_FSelect = QSPI_FSelect_1;
|
|
qspi_config.QSPI_DFlash = QSPI_DFlash_Disable;
|
|
QSPI_Init(&qspi_config);
|
|
QSPI_Cmd(ENABLE);
|
|
|
|
qspi_release(dev);
|
|
}
|
|
|
|
void qspi_use(QSPIPort *dev) {
|
|
if (dev->state->use_count++ == 0) {
|
|
periph_config_enable(QUADSPI, dev->clock_ctrl);
|
|
}
|
|
}
|
|
|
|
void qspi_release(QSPIPort *dev) {
|
|
PBL_ASSERTN(dev->state->use_count > 0);
|
|
if (--dev->state->use_count == 0) {
|
|
periph_config_disable(QUADSPI, dev->clock_ctrl);
|
|
}
|
|
}
|
|
|
|
static void prv_set_num_data_bytes(uint32_t length) {
|
|
// From the docs: QSPI_DataLength: Number of data to be retrieved, value+1.
|
|
// so 0 is 1 byte, so we substract 1 from the length. -1 is read the entire flash length.
|
|
PBL_ASSERTN(length > 0);
|
|
|
|
QSPI_SetDataLength(length - 1);
|
|
}
|
|
|
|
#if DEBUG_QSPI_WAITS
|
|
#define QSPI_WAIT_TIME (100000)
|
|
|
|
// These are a bit dangerous on long erase commands, but they can also be very useful to find out
|
|
// why the QSPI driver is locking up when doing development
|
|
|
|
static void prv_wait_for_transfer_complete(void) {
|
|
int i = 0;
|
|
while (QSPI_GetFlagStatus(QSPI_FLAG_TC) == RESET) {
|
|
if (++i > QSPI_WAIT_TIME) {
|
|
break;
|
|
}
|
|
}
|
|
PBL_ASSERT(i < QSPI_WAIT_TIME, "Waited too long for the QSPI transfer to complete");
|
|
}
|
|
|
|
static void prv_wait_for_not_busy(void) {
|
|
int i = 0;
|
|
while (QSPI_GetFlagStatus(QSPI_FLAG_BUSY) != RESET) {
|
|
if (++i > QSPI_WAIT_TIME) {
|
|
break;
|
|
}
|
|
}
|
|
PBL_ASSERT(i < QSPI_WAIT_TIME, "Waited too long for the QSPI to become not busy");
|
|
}
|
|
|
|
#else
|
|
|
|
static void prv_wait_for_transfer_complete(void) {
|
|
while (QSPI_GetFlagStatus(QSPI_FLAG_TC) == RESET) { }
|
|
}
|
|
|
|
static void prv_wait_for_not_busy(void) {
|
|
while (QSPI_GetFlagStatus(QSPI_FLAG_BUSY) != RESET) { }
|
|
}
|
|
|
|
#endif
|
|
|
|
static void prv_read_bytes(uint8_t *buffer, size_t buffer_size) {
|
|
for (size_t i = 0; i < buffer_size; ++i) {
|
|
buffer[i] = QSPI_ReceiveData8();
|
|
}
|
|
}
|
|
|
|
static void prv_set_ddr_enabled(bool enabled) {
|
|
PBL_ASSERTN(!QSPI_GetFlagStatus(QSPI_FLAG_BUSY));
|
|
if (enabled) {
|
|
QUADSPI->CR &= ~QUADSPI_CR_SSHIFT;
|
|
} else {
|
|
QUADSPI->CR |= QUADSPI_CR_SSHIFT;
|
|
}
|
|
}
|
|
|
|
// CCR register Bits from LSB to MSB
|
|
// INSTRUCTION[7:0]: Instruction
|
|
// IMODE[1:0]: Instruction Mode
|
|
// ADMODE[1:0]: Address Mode
|
|
// ADSIZE[1:0]: Address Size
|
|
// ABMODE[1:0]: Alternate Bytes Mode
|
|
// ABSIZE[1:0]: Instruction Mode
|
|
// DCYC[4:0]: Dummy Cycles
|
|
// RESERVED
|
|
// DMODE[1:0]: Data Mode
|
|
// FMODE[1:0]: Functional Mode
|
|
// SIOO: Send Instruction Only Once Mode
|
|
// RESERVED
|
|
// DHHC: Delay Half Hclk Cycle
|
|
// DDRM: Double Data Rate Mode
|
|
|
|
//! Mask to clear out the valid bits while leaving the reserved bits untouched
|
|
#define QSPI_CCR_CLEAR_MASK (~(QUADSPI_CCR_INSTRUCTION | \
|
|
QUADSPI_CCR_IMODE | \
|
|
QUADSPI_CCR_ADMODE | \
|
|
QUADSPI_CCR_ADSIZE | \
|
|
QUADSPI_CCR_ABMODE | \
|
|
QUADSPI_CCR_ABSIZE | \
|
|
QUADSPI_CCR_DCYC | \
|
|
QUADSPI_CCR_DMODE | \
|
|
QUADSPI_CCR_FMODE | \
|
|
QUADSPI_CCR_SIOO | \
|
|
QUADSPI_CCR_DHHC | \
|
|
QUADSPI_CCR_DDRM))
|
|
|
|
static void prv_set_comm_config(uint32_t modes_bitset, uint32_t dummy_cycles) {
|
|
uint32_t ccr = QUADSPI->CCR;
|
|
ccr &= QSPI_CCR_CLEAR_MASK;
|
|
ccr |= modes_bitset;
|
|
ccr |= (dummy_cycles << 18);
|
|
QUADSPI->CCR = ccr;
|
|
}
|
|
|
|
static bool prv_dma_irq_handler(DMARequest *request, void *context) {
|
|
QSPIPort *dev = context;
|
|
QSPI_DMACmd(DISABLE);
|
|
|
|
signed portBASE_TYPE was_higher_priority_task_woken = pdFALSE;
|
|
xSemaphoreGiveFromISR(dev->state->dma_semaphore, &was_higher_priority_task_woken);
|
|
return was_higher_priority_task_woken != pdFALSE;
|
|
}
|
|
|
|
static void prv_config_indirect_read(QSPIPort *dev, uint8_t instruction, uint32_t addr,
|
|
uint8_t dummy_cycles, bool is_ddr) {
|
|
prv_set_ddr_enabled(is_ddr);
|
|
|
|
uint32_t modes_bitset = 0;
|
|
modes_bitset |= is_ddr ? QSPI_ComConfig_DDRMode_Enable : QSPI_ComConfig_DDRMode_Disable;
|
|
modes_bitset |= is_ddr ? QSPI_ComConfig_DHHC_Enable : QSPI_ComConfig_DHHC_Disable;
|
|
modes_bitset |= QSPI_ComConfig_FMode_Indirect_Read;
|
|
modes_bitset |= QSPI_ComConfig_DMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_IMode_4Line;
|
|
modes_bitset |= instruction;
|
|
if (addr != QSPI_ADDR_NO_ADDR) {
|
|
modes_bitset |= QSPI_ComConfig_ADMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_ADSize_24bit;
|
|
}
|
|
prv_set_comm_config(modes_bitset, dummy_cycles);
|
|
|
|
if (addr != QSPI_ADDR_NO_ADDR) {
|
|
QSPI_SetAddress(addr);
|
|
}
|
|
}
|
|
|
|
static void prv_indirect_read(QSPIPort *dev, uint8_t instruction, uint32_t addr,
|
|
uint8_t dummy_cycles, void *buffer, uint32_t length, bool is_ddr) {
|
|
prv_set_num_data_bytes(length);
|
|
|
|
prv_config_indirect_read(dev, instruction, addr, dummy_cycles, is_ddr);
|
|
|
|
prv_read_bytes(buffer, length);
|
|
|
|
QSPI_ClearFlag(QSPI_FLAG_TC);
|
|
prv_wait_for_not_busy();
|
|
}
|
|
|
|
void qspi_indirect_read_no_addr(QSPIPort *dev, uint8_t instruction, uint8_t dummy_cycles,
|
|
void *buffer, uint32_t length, bool is_ddr) {
|
|
prv_indirect_read(dev, instruction, QSPI_ADDR_NO_ADDR, dummy_cycles, buffer, length, is_ddr);
|
|
}
|
|
void qspi_indirect_read(QSPIPort *dev, uint8_t instruction, uint32_t addr, uint8_t dummy_cycles,
|
|
void *buffer, uint32_t length, bool is_ddr) {
|
|
prv_indirect_read(dev, instruction, addr, dummy_cycles, buffer, length, is_ddr);
|
|
}
|
|
|
|
void qspi_indirect_read_dma(QSPIPort *dev, uint8_t instruction, uint32_t start_addr,
|
|
uint8_t dummy_cycles, void *buffer, uint32_t length, bool is_ddr) {
|
|
// DMA reads are most efficient when doing 32bits at a time. The QSPI bus runs at 100Mhz
|
|
// and we need to be efficient in handling the data to use it to its full capability.
|
|
//
|
|
// So this function is broken into 3 parts:
|
|
// 1. Do reads 1 byte at a time until buffer_ptr is word-aligned
|
|
// 2. Do 32-bit DMA transfers for as much as possible
|
|
// 3. Do reads 1 bytes at a time to deal with non-aligned acceses at the end
|
|
|
|
const uint32_t word_mask = dcache_alignment_mask_minimum(QSPI_DMA_READ_WORD_SIZE);
|
|
const uintptr_t buffer_address = (uintptr_t)buffer;
|
|
|
|
const uintptr_t last_address = buffer_address + length;
|
|
const uintptr_t last_address_aligned = last_address & (~word_mask);
|
|
|
|
const uintptr_t start_address_aligned = ((buffer_address + word_mask) & (~word_mask));
|
|
|
|
uint32_t leading_bytes = start_address_aligned - buffer_address;
|
|
uint32_t trailing_bytes = last_address - last_address_aligned;
|
|
|
|
uint32_t dma_size = last_address_aligned - start_address_aligned;
|
|
|
|
if (last_address_aligned < start_address_aligned) {
|
|
dma_size = 0;
|
|
leading_bytes = length;
|
|
trailing_bytes = 0;
|
|
}
|
|
|
|
prv_set_num_data_bytes(length);
|
|
|
|
prv_config_indirect_read(dev, instruction, start_addr, dummy_cycles, is_ddr);
|
|
|
|
if (leading_bytes > 0) {
|
|
prv_read_bytes(buffer, leading_bytes);
|
|
}
|
|
|
|
if (dma_size > 0) {
|
|
// Do 4 bytes at a time for DMA
|
|
QSPI_SetFIFOThreshold(QSPI_DMA_READ_WORD_SIZE);
|
|
|
|
QSPI_DMACmd(ENABLE);
|
|
stop_mode_disable(InhibitorFlash);
|
|
dma_request_start_direct(dev->dma, (void *)start_address_aligned, (void *)&QUADSPI->DR,
|
|
dma_size, prv_dma_irq_handler, (void *)dev);
|
|
|
|
xSemaphoreTake(dev->state->dma_semaphore, portMAX_DELAY);
|
|
stop_mode_enable(InhibitorFlash);
|
|
}
|
|
|
|
if (trailing_bytes > 0) {
|
|
prv_read_bytes((void *)last_address_aligned, trailing_bytes);
|
|
}
|
|
}
|
|
|
|
static void prv_indirect_write(QSPIPort *dev, uint8_t instruction, uint32_t addr,
|
|
const void *buffer, uint32_t length) {
|
|
if (length) {
|
|
PBL_ASSERTN(buffer);
|
|
prv_set_num_data_bytes(length);
|
|
} else {
|
|
PBL_ASSERTN(!buffer);
|
|
}
|
|
|
|
prv_set_ddr_enabled(false);
|
|
|
|
uint32_t modes_bitset = 0;
|
|
modes_bitset |= QSPI_ComConfig_FMode_Indirect_Write;
|
|
modes_bitset |= QSPI_ComConfig_IMode_4Line;
|
|
modes_bitset |= instruction;
|
|
if (addr != QSPI_ADDR_NO_ADDR) {
|
|
modes_bitset |= QSPI_ComConfig_ADMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_ADSize_24bit;
|
|
}
|
|
if (length) {
|
|
modes_bitset |= QSPI_ComConfig_DMode_4Line;
|
|
}
|
|
prv_set_comm_config(modes_bitset, 0);
|
|
|
|
if (addr != QSPI_ADDR_NO_ADDR) {
|
|
QSPI_SetAddress(addr);
|
|
}
|
|
|
|
const uint8_t *read_ptr = buffer;
|
|
for (uint32_t i = 0; i < length; ++i) {
|
|
// Note: this will stall the CPU when the FIFO gets full while data is being sent.
|
|
// For performance reasons, we should replace it with DMA in the future
|
|
// PBL-28805
|
|
QSPI_SendData8(read_ptr[i]);
|
|
}
|
|
|
|
prv_wait_for_transfer_complete();
|
|
QSPI_ClearFlag(QSPI_FLAG_TC);
|
|
prv_wait_for_not_busy();
|
|
}
|
|
|
|
void qspi_indirect_write_no_addr(QSPIPort *dev, uint8_t instruction, const void *buffer,
|
|
uint32_t length) {
|
|
prv_indirect_write(dev, instruction, QSPI_ADDR_NO_ADDR, buffer, length);
|
|
}
|
|
|
|
void qspi_indirect_write(QSPIPort *dev, uint8_t instruction, uint32_t addr, const void *buffer,
|
|
uint32_t length) {
|
|
prv_indirect_write(dev, instruction, addr, buffer, length);
|
|
}
|
|
|
|
void qspi_indirect_write_no_addr_1line(QSPIPort *dev, uint8_t instruction) {
|
|
prv_set_ddr_enabled(false);
|
|
|
|
uint32_t modes_bitset = 0;
|
|
modes_bitset |= QSPI_ComConfig_FMode_Indirect_Write;
|
|
modes_bitset |= QSPI_ComConfig_IMode_1Line;
|
|
modes_bitset |= instruction;
|
|
prv_set_comm_config(modes_bitset, 0);
|
|
|
|
prv_wait_for_transfer_complete();
|
|
QSPI_ClearFlag(QSPI_FLAG_TC);
|
|
prv_wait_for_not_busy();
|
|
}
|
|
|
|
bool qspi_poll_bit(QSPIPort *dev, uint8_t instruction, uint8_t bit_mask, bool should_be_set,
|
|
uint32_t timeout_us) {
|
|
prv_set_num_data_bytes(1);
|
|
|
|
// Set autopolling on the register
|
|
QSPI_AutoPollingMode_SetInterval(dev->auto_polling_interval);
|
|
QSPI_AutoPollingMode_Config(should_be_set ? bit_mask : 0, bit_mask, QSPI_PMM_AND);
|
|
QSPI_AutoPollingModeStopCmd(ENABLE);
|
|
|
|
prv_set_ddr_enabled(false);
|
|
|
|
// Prepare transaction
|
|
uint32_t modes_bitset = 0;
|
|
modes_bitset |= QSPI_ComConfig_FMode_Auto_Polling;
|
|
modes_bitset |= QSPI_ComConfig_DMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_IMode_4Line;
|
|
modes_bitset |= instruction;
|
|
prv_set_comm_config(modes_bitset, 0);
|
|
|
|
uint32_t loops = 0;
|
|
while (QSPI_GetFlagStatus(QSPI_FLAG_SM) == RESET) {
|
|
if ((timeout_us != QSPI_NO_TIMEOUT) && (++loops > timeout_us)) {
|
|
PBL_LOG(LOG_LEVEL_ERROR, "Timeout waiting for a bit!?!?");
|
|
return false;
|
|
}
|
|
delay_us(1);
|
|
}
|
|
|
|
// stop polling mode
|
|
QSPI_AbortRequest();
|
|
prv_wait_for_not_busy();
|
|
|
|
return true;
|
|
}
|
|
|
|
void qspi_mmap_start(QSPIPort *dev, uint8_t instruction, uint32_t addr, uint8_t dummy_cycles,
|
|
uint32_t length, bool is_ddr) {
|
|
dcache_invalidate((void *)(uintptr_t)(QSPI_MMAP_BASE_ADDRESS + addr), length);
|
|
|
|
prv_set_ddr_enabled(is_ddr);
|
|
|
|
uint32_t modes_bitset = 0;
|
|
modes_bitset |= is_ddr ? QSPI_ComConfig_DDRMode_Enable : QSPI_ComConfig_DDRMode_Disable;
|
|
modes_bitset |= is_ddr ? QSPI_ComConfig_DHHC_Enable : QSPI_ComConfig_DHHC_Disable;
|
|
modes_bitset |= QSPI_ComConfig_FMode_Memory_Mapped;
|
|
modes_bitset |= QSPI_ComConfig_DMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_IMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_ADMode_4Line;
|
|
modes_bitset |= QSPI_ComConfig_ADSize_24bit;
|
|
modes_bitset |= instruction;
|
|
|
|
prv_set_comm_config(modes_bitset, dummy_cycles);
|
|
|
|
// The QSPI will prefetch bytes as long as nCS is low. This causes the flash part to draw a lot
|
|
// more power (10mA vs 10uA in the case of Silk). Set the timeout such that the prefetch will
|
|
// stop after 10 clock cycles of inactivity.
|
|
QSPI_MemoryMappedMode_SetTimeout(10);
|
|
|
|
// HACK ALERT: It seems like the MCU may send the wrong address for the first MMAP after certain
|
|
// flash operations (we have seen it with an indirect read). To work around this, kick off one
|
|
// read sufficiently far away from the area we want to read. This seems to reset the QSPI
|
|
// controller back into a good state. This workaround is a little wasteful as it kicks off a 32
|
|
// byte flash read but at 50MHz that should only take ~1.5us:
|
|
// ((1byte +3byteaddr + 32bytes data) * 2 clocks/byte + 4 dummy_clocks) / 50Mhz = 1.52us
|
|
|
|
volatile uint8_t *qspi_wa_addr = (uint8_t *)(QSPI_MMAP_BASE_ADDRESS + ((addr > 128) ? 0 : 256));
|
|
dcache_invalidate((void *)qspi_wa_addr, 1);
|
|
(void)*qspi_wa_addr;
|
|
}
|
|
|
|
void qspi_mmap_stop(QSPIPort *dev) {
|
|
QSPI_AbortRequest();
|
|
prv_wait_for_not_busy();
|
|
}
|