GNU gperf is a perfect hash function generator. For a given list of strings,it produces a hash function and hash table, in form of C or C++ code, forlooking up a value depending on the input string. The hash function isperfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only.
GNU gperf是完美哈希函数生成器。对于一个给定的字符串集,它会生成哈希函数和哈希表。以c或cpp代码的形式,通过输入的字符串来实现查询操作。产生的哈希函数是完美的,意味着哈希表没有冲突,并且查询操作仅仅需要一次比较操作。
下载地址:点击此处
下载后直接双击安装
设置环境变量 把安装的bin文件目录设置到path中
gperf可以根据输入文件中的字符串集合生成hash函数和哈希表(生成完美哈希),一般根据字符串计算出无冲突hash值,然后进行字符串比较
1.配置输入文件
输入文件格式实例c.gperf:
if do int for case char auto goto else long void enum float short union break while const double static extern struct return sizeof switch signed typedef default unsigned continue register volatilejscript.h:(需要的头文件)
enum { TK_ABSTRACT = 255, TK_BOOLEAN, TK_BREAK, TK_BYTE, TK_CASE, TK_CATCH, TK_CHAR, TK_CLASS, TK_CONST, TK_CONTINUE, TK_DEFAULT, TK_DO, TK_DOUBLE, TK_ELSE, TK_EXTENDS, TK_FALSE, TK_FINAL, TK_FINALLY, TK_FLOAT, TK_FOR, TK_FUNCTION, TK_GOTO, TK_IF, TK_IMPLEMENTS, TK_IMPORT, TK_IN, TK_INSTANCEOF, TK_INT, TK_INTERFACE, TK_LONG, TK_NATIVE, TK_NEW, TK_NULL, TK_PACKAGE, TK_PRIVATE, TK_PROTECTED, TK_PUBLIC, TK_RETURN, TK_SHORT, TK_STATIC, TK_SUPER, TK_SWITCH, TK_SYNCHRONIZED, TK_THIS, TK_THROW, TK_THROWS, TK_TRANSIENT, TK_TRUE, TK_TRY, TK_VAR, TK_VOID, TK_WHILE, TK_WITH, };
jscript.gperf文件实例:
%{ /* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */ /* -g -o -j1 -t -p */ #include <stdio.h> #include "jscript.h" %} struct js_keyword { char * name; int token; }; %% # Javascript reserved words, see "keywords.html" abstract, TK_ABSTRACT boolean, TK_BOOLEAN break, TK_BREAK byte, TK_BYTE case, TK_CASE catch, TK_CATCH char, TK_CHAR class, TK_CLASS const, TK_CONST continue, TK_CONTINUE default, TK_DEFAULT do, TK_DO double, TK_DOUBLE else, TK_ELSE extends, TK_EXTENDS false, TK_FALSE final, TK_FINAL finally, TK_FINALLY float, TK_FLOAT for, TK_FOR function, TK_FUNCTION goto, TK_GOTO if, TK_IF implements, TK_IMPLEMENTS import, TK_IMPORT in, TK_IN instanceof, TK_INSTANCEOF int, TK_INT interface, TK_INTERFACE long, TK_LONG native, TK_NATIVE new, TK_NEW null, TK_NULL package, TK_PACKAGE private, TK_PRIVATE protected, TK_PROTECTED public, TK_PUBLIC return, TK_RETURN short, TK_SHORT static, TK_STATIC super, TK_SUPER switch, TK_SWITCH synchronized, TK_SYNCHRONIZED this, TK_THIS throw, TK_THROW throws, TK_THROWS transient, TK_TRANSIENT true, TK_TRUE try, TK_TRY var, TK_VAR void, TK_VOID while, TK_WHILE with, TK_WITH %% int main(void) //要写的程序也可以写到文件中,从而后来生成真正的程序 { char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0]; while (*cp != NULL) { struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp)); printf("%s is %s javascript expression!\n", *cp,p? "in":"not in"); printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in"); printf("%s->%d\n",p->name,p->token); cp++; } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; } return 0; } 2.用配置文件生成程序源文件(注意,gperf哈希函数和哈希表都是硬编码到此生成文件中) D:\study\gperf\example>gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf > jscript.c 切换到当前目录,通过设置的jscript.gperf文件生成 jscript.c源文件 /* C code produced by gperf version 3.0.1 */ /* Command-line: gperf -k'1,2,$' -t -K name -H js_kw_hash -N js_kw_lookup jscript.gperf */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) /* The character set is not based on ISO-646. */ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." #endif #line 1 "jscript.gperf" /* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */ /* -g -o -j1 -t -p */ #include <stdio.h> #include "jscript.h" #line 7 "jscript.gperf" struct js_keyword { char * name; int token; }; #define TOTAL_KEYWORDS 53 //统计记录总数 #define MIN_WORD_LENGTH 2 //字符串最小长度 #define MAX_WORD_LENGTH 12 //字符串最大长度 #define MIN_HASH_VALUE 2 //最小哈希值,对应于wordlist的索引值 #define MAX_HASH_VALUE 79 //最大哈希值,对应于wordlist的索引值 /* maximum key range = 78, duplicates = 0 */ #ifdef __GNUC__ __inline #else #ifdef __cplusplus inline #endif #endif static unsigned int js_kw_hash (str, len) register const char *str; register unsigned int len; { static unsigned char asso_values[] = /*定义包含255个字符的数组,用MAX_HASH_VALUE+1初始化*/ { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 25, 35, 25, 20, 0, 10, 50, 0, 0, 80, 15, 15, 45, 0, 10, 5, 80, 15, 5, 5, 40, 5, 0, 0, 30, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; //最重要的hash函数,计算wordlist索引 } #ifdef __GNUC__ __inline #endif struct js_keyword * js_kw_lookup (str, len) register const char *str; register unsigned int len; { static struct js_keyword wordlist[] = //定义hash表 { {""}, {""}, #line 39 "jscript.gperf" {"in", TK_IN}, #line 45 "jscript.gperf" {"new", TK_NEW}, #line 66 "jscript.gperf" {"with", TK_WITH}, #line 65 "jscript.gperf" {"while", TK_WHILE}, {""}, {""}, #line 41 "jscript.gperf" {"int", TK_INT}, #line 42 "jscript.gperf" {"interface", TK_INTERFACE}, #line 58 "jscript.gperf" {"throw", TK_THROW}, #line 55 "jscript.gperf" {"switch", TK_SWITCH}, #line 28 "jscript.gperf" {"extends", TK_EXTENDS}, {""}, #line 57 "jscript.gperf" {"this", TK_THIS}, #line 52 "jscript.gperf" {"short", TK_SHORT}, #line 59 "jscript.gperf" {"throws", TK_THROWS}, {""}, {""}, #line 27 "jscript.gperf" {"else", TK_ELSE}, #line 40 "jscript.gperf" {"instanceof", TK_INSTANCEOF}, #line 51 "jscript.gperf" {"return", TK_RETURN}, #line 36 "jscript.gperf" {"if", TK_IF}, {""}, #line 61 "jscript.gperf" {"true", TK_TRUE}, {""}, {""}, #line 48 "jscript.gperf" {"private", TK_PRIVATE}, {""}, {""}, #line 30 "jscript.gperf" {"final", TK_FINAL}, #line 44 "jscript.gperf" {"native", TK_NATIVE}, #line 24 "jscript.gperf" {"default", TK_DEFAULT}, {""}, #line 60 "jscript.gperf" {"transient", TK_TRANSIENT}, #line 32 "jscript.gperf" {"float", TK_FLOAT}, #line 26 "jscript.gperf" {"double", TK_DOUBLE}, #line 47 "jscript.gperf" {"package", TK_PACKAGE}, #line 33 "jscript.gperf" {"for", TK_FOR}, #line 64 "jscript.gperf" {"void", TK_VOID}, #line 29 "jscript.gperf" {"false", TK_FALSE}, #line 53 "jscript.gperf" {"static", TK_STATIC}, #line 25 "jscript.gperf" {"do", TK_DO}, #line 23 "jscript.gperf" {"continue", TK_CONTINUE}, #line 20 "jscript.gperf" {"char", TK_CHAR}, #line 22 "jscript.gperf" {"const", TK_CONST}, {""}, #line 31 "jscript.gperf" {"finally", TK_FINALLY}, #line 63 "jscript.gperf" {"var", TK_VAR}, #line 49 "jscript.gperf" {"protected", TK_PROTECTED}, #line 21 "jscript.gperf" {"class", TK_CLASS}, {""}, #line 15 "jscript.gperf" {"boolean", TK_BOOLEAN}, #line 62 "jscript.gperf" {"try", TK_TRY}, #line 18 "jscript.gperf" {"case", TK_CASE}, #line 19 "jscript.gperf" {"catch", TK_CATCH}, #line 38 "jscript.gperf" {"import", TK_IMPORT}, {""}, #line 34 "jscript.gperf" {"function", TK_FUNCTION}, #line 46 "jscript.gperf" {"null", TK_NULL}, #line 37 "jscript.gperf" {"implements", TK_IMPLEMENTS}, {""}, {""}, {""}, {""}, #line 54 "jscript.gperf" {"super", TK_SUPER}, {""}, #line 56 "jscript.gperf" {"synchronized", TK_SYNCHRONIZED}, {""}, #line 17 "jscript.gperf" {"byte", TK_BYTE}, #line 16 "jscript.gperf" {"break", TK_BREAK}, {""}, {""}, #line 14 "jscript.gperf" {"abstract", TK_ABSTRACT}, #line 35 "jscript.gperf" {"goto", TK_GOTO}, {""}, #line 50 "jscript.gperf" {"public", TK_PUBLIC}, {""}, {""}, #line 43 "jscript.gperf" {"long", TK_LONG} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = js_kw_hash (str, len); if (key <= MAX_HASH_VALUE && key >= 0) { register const char *s = wordlist[key].name; if (*str == *s && !strcmp (str + 1, s + 1)) return &wordlist[key]; } } return 0; } #line 67 "jscript.gperf" int main(void) { char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0]; while (*cp != NULL) { struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp)); printf("%s is %s javascript expression!\n", *cp,p? "in":"not in"); printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in"); printf("%s->%d\n",p->name,p->token); cp++; } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; } return 0; } 我们主要关注两个函数:static unsigned int js_kw_hash (str, len):根据传入的字符串生成哈希值
struct js_keyword * js_kw_lookup (str, len):根据传入的字符串进行查找操作
编译
运行:
详解参见:https://www.ibm.com/developerworks/cn/linux/l-gperf.html