glibc系列之strlen()函数学习。
首先奉上的是strlen的源代码:
size_t
STRLEN (const char *str) 可以看到返回值是个size_t,输入参数是char型数组。
{
const char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, himagic, lomagic;
/* Handle
the first few
characters by reading one
character at a
time.
Do this
until CHAR_PTR
is aligned
on a longword boundary. */
for (char_ptr = str; ((unsigned long int) char_ptr
& (sizeof (longword) -
1)) !=
0; 这里在做的是字节对齐。
++char_ptr)
if char_ptr;
/* Bits
31,
24,
16,
and 8 of this
number are zero. Call these bits
the "holes." Note
that there
is a hole just
to the left
of
each byte,
with an extra
at the end:
bits:
01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The
1-bits make sure
that carries propagate
to the next
0-bit.
The
0-bits provide holes
for carries
to fall
into. */
himagic =
0x80808080L; 这两个魔数很重要。
lomagic =
0x01010101L;
if (sizeof (longword) >
4)
{
/*
64-bit
version of the magic. */
/* Do
the shift
in two steps
to avoid a warning
if long has
32 bits. */
himagic = ((himagic <<
16) <<
16) | himagic;
lomagic = ((lomagic <<
16) <<
16) | lomagic;
}
if (sizeof (longword) >
8)
abort ();
/* Instead
of the traditional loop which tests each
character,
we will test a longword
at a
time. The tricky part
is testing
if *any
of the four* bytes
in the longword
in question are zero. */
for (;;)
{
longword = *longword_ptr++;
if (((longword - lomagic) & ~longword & himagic) !=
0) 主要是判断这连续的四个字节有没有
0x0,如果有
0,那么进入到
if语句中,
{
/* Which
of the bytes was
the zero? If none
of them were,
it was
a misfire;
continue the search. */
const char *cp = (const char *) (longword_ptr -
1);
if (cp[
0] ==
0)
return cp - str;
if (cp[
1] ==
0)
return cp - str +
1;
if (cp[
2] ==
0)
return cp - str +
2;
if (cp[
3] ==
0)
return cp - str +
3;
if (sizeof (longword) >
4)
{
if (cp[
4] ==
0)
return cp - str +
4;
if (cp[
5] ==
0)
return cp - str +
5;
if (cp[
6] ==
0)
return cp - str +
6;
if (cp[
7] ==
0)
return cp - str +
7;
}
}
}
}
下面是代码解析
首先给出俩例子来形象的说明这个问题。 1 himagic = 0x80808080L; 1000-0000 1000-0000 1000-0000 1000-0000 lomagic = 0x01010101L; 0000-0001 0000-0001 0000-0001 0000-0001 ((longword - lomagic) & ~longword & himagic) != 0 举个例子来说明这个问题: longword = 000000ff a =(longword - lomagic) =fefefffe b = ~longword = ffffff00 a&b = fefeff00 himagic = 80808080 a&b&himagic 80808000
2 另一个例子: longword =00000000 a =(longword - lomagic) =fefefeff
b = ~longword =ffffffff
a&b = fefefeff
himagic = 80808080
a&b&himagic = 80808080
其实最主要的是if (((longword - lomagic) & ~longword & himagic) != 0) 怎么理解的。
首先考虑到的是ASCII码最高位是是0, ~longword & himagic运算的结果就是看看哪个字节的最高位不是0.如果都是0 ,则运算结果是0x80808080. longword - lomagic 运算结果是一旦longword 有一个字节为0,那么久会是0xfe。也就是找到了‘\0’,进入了if体里。
下面是我自己写的strlen()函数。
#define ulint unsigned long
int
unsigned
int
mglStrLen(
const char *
str)
{
const char *tempStr ;
for (tempStr =
str; (ulint)tempStr &(sizeof(ulint) -
1) !=
0; tempStr++)
{
if (*tempStr ==
'\0')
return tempStr -
str;
}
ulint himagic =
0x80808080;
ulint lomagic =
0x01010101;
const ulint* longwordPtr = (
const ulint *)tempStr;
while (
1)
{
longwordPtr++;
if (((*longwordPtr - lomagic)&~(*longwordPtr)&himagic) !=
0)
{
if (*(
char *)longwordPtr ==
0)
return (
char *)longwordPtr -
str;
if (*((
char *)longwordPtr+
1) ==
0)
return (
char *)longwordPtr -
str+
1;
if (*((
char *)longwordPtr +
2) ==
0)
return (
char *)longwordPtr -
str +
2;
if (*((
char *)longwordPtr +
3) ==
0)
return (
char *)longwordPtr -
str +
3;
}
}
}
void main()
{
char str[] =
"mglhahaahhahah"; ---->
14个字节
int len = mglStrLen(
str);
cout <<
"mgl " <<len<< endl;
}
程序运行结果:
mgl 14
请按任意键继续. . .
一个属于mgl的完美unsigned int strlen(const char * str){}函数。
哈哈!!