原文链接:https://blog.csdn.net/hairetz/article/details/18264243
#include <sys/socket.h> #include <sys/epoll.h> #include <netinet/in.h> #include <arpa/inet.h> #include <fcntl.h> #include <unistd.h> #include <stdio.h> #include <errno.h> #include <iostream> #include <strings.h> #include <string.h> #include <stdlib.h> #include <sys/time.h> using namespace std; static inline int strcmp_asm(const char * cs,const char * ct) { int d0, d1; register int __res; __asm__ __volatile__( "1:\tlodsb\n\t" "scasb\n\t" "jne 2f\n\t" "testb %%al,%%al\n\t" "jne 1b\n\t" "xorl %%eax,%%eax\n\t" "jmp 3f\n" "2:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "3:" :"=a" (__res), "=&S" (d0), "=&D" (d1) :"1" (cs),"2" (ct)); return __res; } static inline int strcmp_normal(const char * cs,const char * ct) { while(*cs!='\0' && *ct!='\0') { if(*cs != *ct) return *cs<*ct?-1:1; cs++; ct++; } if(*cs != *ct) return *cs<*ct?-1:1; return 0; } static inline int generator_data(string &s,int iSamelen=1,const char *sepc=NULL) { s.clear(); for(int i=0;i<iSamelen;++i) s+="1"; s+=sepc; return 0; } static inline void strcmp_perfom(int loop=1,int iSamelen=1) { //loop asm string s1,s2; generator_data(s1,iSamelen,"s1"); generator_data(s2,iSamelen,"s2"); struct timeval start_tv,loop1_tv; gettimeofday(&start_tv, NULL); for(int i=0;i<loop;++i) { strcmp_asm(s1.c_str(),s2.c_str()); } gettimeofday(&loop1_tv, NULL); int iMs = (loop1_tv.tv_sec - start_tv.tv_sec)*1000 + (loop1_tv.tv_usec - start_tv.tv_usec)/1000; //loop normal string ss1,ss2; generator_data(ss1,iSamelen,"s1"); generator_data(ss2,iSamelen,"s2"); gettimeofday(&start_tv, NULL); for(int i=0;i<loop;++i) { strcmp_normal(ss1.c_str(),ss2.c_str()); } gettimeofday(&loop1_tv, NULL); int iMs2 = (loop1_tv.tv_sec - start_tv.tv_sec)*1000 + (loop1_tv.tv_usec - start_tv.tv_usec)/1000; printf("loop:%d,strcmp_asm,time:%d ms,strcmp_normal,time:%d ms\n",loop,iMs,iMs2); } int main(int argc,char** argv) { int loop=10000; int iSamelen=1000; if(argc>1) { loop = atoll(argv[1]); } if(argc>2) { iSamelen = atoll(argv[2]); } printf("input loop:%d,iSamelen:%d\n",loop,iSamelen); strcmp_perfom(loop,iSamelen); return 0; }
测试结果:
测试结果如下,在匹配串长度足够的情况下,性能差了3~4倍。
input loop:5000,iSamelen:100000
loop:5000,strcmp_asm,time:468 ms,strcmp_normal,time:1747 ms
input loop:50000,iSamelen:10000
loop:50000,strcmp_asm,time:501 ms,strcmp_normal,time:1749 ms
input loop:500000,iSamelen:1000
loop:500000,strcmp_asm,time:494 ms,strcmp_normal,time:1787 ms
input loop:5000000,iSamelen:100
loop:5000000,strcmp_asm,time:693 ms,strcmp_normal,time:1922 ms
input loop:50000000,iSamelen:10
loop:50000000,strcmp_asm,time:1337 ms,strcmp_normal,time:2424 ms
在匹配串极短的场景,1,2,4字节的情况,性能也有1.5~2倍左右的差距
input loop:50000000,iSamelen:4
loop:50000000,strcmp_asm,time:849 ms,strcmp_normal,time:1425 ms
input loop:50000000,iSamelen:2
loop:50000000,strcmp_asm,time:753 ms,strcmp_normal,time:1130 ms
input loop:50000000,iSamelen:1
loop:50000000,strcmp_asm,time:670 ms,strcmp_normal,time:1040 ms
文章的脚注信息由WordPress的wp-posturl插件自动生成