I am trying to find an efficient way to do the following in x86_64 assembly:
if(N < word_size) {
dst[N] = 1; // as in Nth bit of dst = 1
}
else {
dst[word_size - 1:0] = 0
}
Alternative I could get the desired result if the "else" case did not unset the other bits, or if the "if" case did unset the other bits. The important thing is that if N > word_size it will not set any bits
I am unable to find any instruction that might do this as bt[s/c], shlx, sal, rol, shld all appear to the take the module of src by width.
The use case is basically I will be iterating over a bit vector with a known length and want to either A) find the first set bit and return its position, or B) test all of the bits and if no set bit is found return length of the vector.
// rsi has length
L(keep_searching):
movq %(rdi), %rax
testq %rax, %rax
jnz L(found)
subq $64, rsi
jbe L(done) // this done will return origional value of rsi
addq $8, %rdi
jmp L(keep_searching)
I figure this could be vastly sped up if I could quickly set a bit in rax if rsi < 64 so I could drop the second branch. But for this to work it needs to have the behavior above i.e it can't set the bit of rsi % 64, it needs to set iff rsi < 64.
Does anyone know of an instruction that can do this? Every instruction I can think of to check uses module on src. Any help would be greatly appreciated.
Thanks!
A few versions that are working well for me for 32 bit. If I use MMX @PeterCordes pointed out pllsq is exactly what I want.
uint64_t __attribute__((noinline, noclone)) shift(uint64_t cnt) {
uint64_t ret = 0;
asm volatile(
"cmpq $32, %[cnt]\n\t"
"setbe %b[ret]\n\t"
"shlxq %[cnt], %[ret], %[ret]\n\t"
: [ ret ] "+r"(ret)
: [ cnt ] "r"(cnt)
: "cc");
return ret;
}
uint64_t __attribute__((noinline, noclone)) shift2(uint64_t cnt) {
uint64_t ret = 0, tmp = 0;
asm volatile(
"leaq -33(%[cnt]), %[tmp]\n\t"
"movl $1, %k[ret]\n\t"
"shlxq %[cnt], %[ret], %[ret]\n\t"
"sarq $63, %[tmp]\n\t"
"andq %[tmp], %[ret]\n\t"
: [ ret ] "+r"(ret), [ tmp ] "+r"(tmp), [ cnt ] "+r"(cnt)
:
: "cc");
return ret;
}
uint64_t __attribute__((noinline, noclone)) shift3(uint64_t cnt) {
uint64_t ret, tmp;
asm volatile(
"leaq -33(%[cnt]), %[tmp]\n\t"
"btsq %[cnt], %[ret]\n\t"
"sarq $63, %[tmp]\n\t"
"andq %[tmp], %[ret]\n\t"
: [ ret ] "+r"(ret), [ tmp ] "+r"(tmp), [ cnt ] "+r"(cnt)
:
: "cc");
return ret;
}