Skip to content

Q: slightly more compact memmove_P #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
mcspr opened this issue Sep 9, 2022 · 0 comments
Open

Q: slightly more compact memmove_P #23

mcspr opened this issue Sep 9, 2022 · 0 comments

Comments

@mcspr
Copy link

mcspr commented Sep 9, 2022

Looking at the current memmove_P implementation

void *memmove_P(void *dest, const void *src, size_t n)
{
if ( ((const char *)src >= (const char *)0x40000000) && ((const char *)dest < (const char *)0x40000000) )
return memcpy_P(dest, src, n);
else
return memmove(dest, src, n);
}

Since it is checking for a number with only one bit set... I wondered if just checking that fact does anything to the code, since we could simply discard the idea that it is going to be used on any 'higher' addresses.
Not sure how to benchmark it, though, so I am not really sure if this does anything useful at all
(besides making it 5 bytes smaller :)

// > cat memmove.c
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <sys/pgmspace.h>

inline static bool inFlash(const void* ptr) {
    // common comparison would use >=0x40000000
    // instead, slightly reduce the footprint by
    // checking *only* for numbers below it
    static const uintptr_t Mask = 1 << 30;
    return ((uintptr_t)(ptr) & Mask) > 0;
}

void* memmove_P2 (void* dest, const void* src, size_t n) {
    if (inFlash(src) && !inFlash(dest)) {
        return memcpy_P(dest, src, n);
    } else {
        return memmove(dest, src, n);
    }
}

void* memmove_P1 (void* dest, const void* src, size_t n)
{
    if ( ((const char *)src >= (const char *)0x40000000) && ((const char *)dest < (const char *)0x40000000) )
        return memcpy_P(dest, src, n);
    else
        return memmove(dest, src, n);
}
> xtensa-lx106-elf-gcc -c -Os memmove.c
> xtensa-lx106-elf-nm --radix=d -S memmove.o | grep memmove
         U memmove
00000020 00000023 T memmove_P1
00000000 00000018 T memmove_P2
> xtensa-lx106-elf-gcc -S -Os memmove.c
    .file   "memmove.c"
    .text
    .literal_position
    .align  4
    .global memmove_P2
    .type   memmove_P2, @function
memmove_P2:
    bbci    a3, 30, .L2    ; branch on bit set / unset
    bbsi    a2, 30, .L2
    j.l memcpy_P, a9
.L2:
    j.l memmove, a9
    .size   memmove_P2, .-memmove_P2
    .literal_position
    .align  4
    .global memmove_P1
    .type   memmove_P1, @function
memmove_P1:
    movi.n  a5, -1        ; btw this only happens on Os, O2 and O3 use l32r const of 0x40000000
    srli    a5, a5, 2
    bgeu    a5, a3, .L7
    bltu    a5, a2, .L7
    j.l memcpy_P, a9
.L7:
    j.l memmove, a9
    .size   memmove_P1, .-memmove_P1
    .ident  "GCC: (GNU) 10.3.0"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant