mirror of
				https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
				synced 2025-10-31 03:13:59 +00:00 
			
		
		
		
	 0136611c62
			
		
	
	
		0136611c62
		
	
	
	
	
		
			
			Based on patch from David Rientjes <rientjes@google.com>, but changed by AK. Optimizes the 64-bit hamming weight for x86_64 processors assuming they have fast multiplication. Uses five fewer bitops than the generic hweight64. Benchmark on one EMT64 showed ~25% speedup with 2^24 consecutive calls. Define a new ARCH_HAS_FAST_MULTIPLIER that can be set by other architectures that can also multiply fast. Signed-off-by: Andi Kleen <ak@suse.de>
		
			
				
	
	
		
			60 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include <linux/module.h>
 | |
| #include <asm/types.h>
 | |
| #include <asm/bitops.h>
 | |
| 
 | |
| /**
 | |
|  * hweightN - returns the hamming weight of a N-bit word
 | |
|  * @x: the word to weigh
 | |
|  *
 | |
|  * The Hamming Weight of a number is the total number of bits set in it.
 | |
|  */
 | |
| 
 | |
| unsigned int hweight32(unsigned int w)
 | |
| {
 | |
| 	unsigned int res = w - ((w >> 1) & 0x55555555);
 | |
| 	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F0F0F;
 | |
| 	res = res + (res >> 8);
 | |
| 	return (res + (res >> 16)) & 0x000000FF;
 | |
| }
 | |
| EXPORT_SYMBOL(hweight32);
 | |
| 
 | |
| unsigned int hweight16(unsigned int w)
 | |
| {
 | |
| 	unsigned int res = w - ((w >> 1) & 0x5555);
 | |
| 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F;
 | |
| 	return (res + (res >> 8)) & 0x00FF;
 | |
| }
 | |
| EXPORT_SYMBOL(hweight16);
 | |
| 
 | |
| unsigned int hweight8(unsigned int w)
 | |
| {
 | |
| 	unsigned int res = w - ((w >> 1) & 0x55);
 | |
| 	res = (res & 0x33) + ((res >> 2) & 0x33);
 | |
| 	return (res + (res >> 4)) & 0x0F;
 | |
| }
 | |
| EXPORT_SYMBOL(hweight8);
 | |
| 
 | |
| unsigned long hweight64(__u64 w)
 | |
| {
 | |
| #if BITS_PER_LONG == 32
 | |
| 	return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
 | |
| #elif BITS_PER_LONG == 64
 | |
| #ifdef ARCH_HAS_FAST_MULTIPLIER
 | |
| 	w -= (w >> 1) & 0x5555555555555555ul;
 | |
| 	w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
 | |
| 	w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
 | |
| 	return (w * 0x0101010101010101ul) >> 56;
 | |
| #else
 | |
| 	__u64 res = w - ((w >> 1) & 0x5555555555555555ul);
 | |
| 	res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
 | |
| 	res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
 | |
| 	res = res + (res >> 8);
 | |
| 	res = res + (res >> 16);
 | |
| 	return (res + (res >> 32)) & 0x00000000000000FFul;
 | |
| #endif
 | |
| #endif
 | |
| }
 | |
| EXPORT_SYMBOL(hweight64);
 |