GNU工具-gperf详解(完美哈希函数生成器)

xiaoxiao2021-02-28  66

1.gperf是干什么的?

GNU gperf is a perfect hash function generator. For a given list of strings,it produces a hash function and hash table, in form of C or C++ code, forlooking up a value depending on the input string. The hash function isperfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only.

GNU gperf是完美哈希函数生成器。对于一个给定的字符串集,它会生成哈希函数和哈希表。以c或cpp代码的形式,通过输入的字符串来实现查询操作。产生的哈希函数是完美的,意味着哈希表没有冲突,并且查询操作仅仅需要一次比较操作。

2.下载安装(windows版)

下载地址:点击此处

下载后直接双击安装

设置环境变量 把安装的bin文件目录设置到path中

3.使用方法

  gperf可以根据输入文件中的字符串集合生成hash函数和哈希表(生成完美哈希),一般根据字符串计算出无冲突hash值,然后进行字符串比较

 1.配置输入文件

输入文件格式实例c.gperf

if do int for case char auto goto else long void enum float short union break while const double static extern struct return sizeof switch signed typedef default unsigned continue register volatile

jscript.h:(需要的头文件)

enum { TK_ABSTRACT = 255, TK_BOOLEAN, TK_BREAK, TK_BYTE, TK_CASE, TK_CATCH, TK_CHAR, TK_CLASS, TK_CONST, TK_CONTINUE, TK_DEFAULT, TK_DO, TK_DOUBLE, TK_ELSE, TK_EXTENDS, TK_FALSE, TK_FINAL, TK_FINALLY, TK_FLOAT, TK_FOR, TK_FUNCTION, TK_GOTO, TK_IF, TK_IMPLEMENTS, TK_IMPORT, TK_IN, TK_INSTANCEOF, TK_INT, TK_INTERFACE, TK_LONG, TK_NATIVE, TK_NEW, TK_NULL, TK_PACKAGE, TK_PRIVATE, TK_PROTECTED, TK_PUBLIC, TK_RETURN, TK_SHORT, TK_STATIC, TK_SUPER, TK_SWITCH, TK_SYNCHRONIZED, TK_THIS, TK_THROW, TK_THROWS, TK_TRANSIENT, TK_TRUE, TK_TRY, TK_VAR, TK_VOID, TK_WHILE, TK_WITH, };

jscript.gperf文件实例:

%{ /* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */ /* -g -o -j1 -t -p */ #include <stdio.h> #include "jscript.h" %} struct js_keyword { char * name; int token; }; %% # Javascript reserved words, see "keywords.html" abstract, TK_ABSTRACT boolean, TK_BOOLEAN break, TK_BREAK byte, TK_BYTE case, TK_CASE catch, TK_CATCH char, TK_CHAR class, TK_CLASS const, TK_CONST continue, TK_CONTINUE default, TK_DEFAULT do, TK_DO double, TK_DOUBLE else, TK_ELSE extends, TK_EXTENDS false, TK_FALSE final, TK_FINAL finally, TK_FINALLY float, TK_FLOAT for, TK_FOR function, TK_FUNCTION goto, TK_GOTO if, TK_IF implements, TK_IMPLEMENTS import, TK_IMPORT in, TK_IN instanceof, TK_INSTANCEOF int, TK_INT interface, TK_INTERFACE long, TK_LONG native, TK_NATIVE new, TK_NEW null, TK_NULL package, TK_PACKAGE private, TK_PRIVATE protected, TK_PROTECTED public, TK_PUBLIC return, TK_RETURN short, TK_SHORT static, TK_STATIC super, TK_SUPER switch, TK_SWITCH synchronized, TK_SYNCHRONIZED this, TK_THIS throw, TK_THROW throws, TK_THROWS transient, TK_TRANSIENT true, TK_TRUE try, TK_TRY var, TK_VAR void, TK_VOID while, TK_WHILE with, TK_WITH %% int main(void) //要写的程序也可以写到文件中,从而后来生成真正的程序 { char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0];  while (*cp != NULL)     {         struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));         printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");         printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");         printf("%s->%d\n",p->name,p->token);         cp++;     } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; } return 0; } 2.用配置文件生成程序源文件(注意,gperf哈希函数和哈希表都是硬编码到此生成文件中)

D:\study\gperf\example>gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf  > jscript.c 切换到当前目录,通过设置的jscript.gperf文件生成 jscript.c源文件

/* C code produced by gperf version 3.0.1 */ /* Command-line: gperf -k'1,2,$' -t -K name -H js_kw_hash -N js_kw_lookup jscript.gperf */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) /* The character set is not based on ISO-646. */ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." #endif #line 1 "jscript.gperf" /* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */ /* -g -o -j1 -t -p */ #include <stdio.h> #include "jscript.h" #line 7 "jscript.gperf" struct js_keyword { char * name; int token; }; #define TOTAL_KEYWORDS 53 //统计记录总数 #define MIN_WORD_LENGTH 2 //字符串最小长度 #define MAX_WORD_LENGTH 12 //字符串最大长度 #define MIN_HASH_VALUE 2 //最小哈希值,对应于wordlist的索引值 #define MAX_HASH_VALUE 79 //最大哈希值,对应于wordlist的索引值 /* maximum key range = 78, duplicates = 0 */ #ifdef __GNUC__ __inline #else #ifdef __cplusplus inline #endif #endif static unsigned int js_kw_hash (str, len) register const char *str; register unsigned int len; { static unsigned char asso_values[] = /*定义包含255个字符的数组,用MAX_HASH_VALUE+1初始化*/ { 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 25, 35, 25, 20, 0, 10, 50, 0, 0, 80, 15, 15, 45, 0, 10, 5, 80, 15, 5, 5, 40, 5, 0, 0, 30, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; //最重要的hash函数,计算wordlist索引 } #ifdef __GNUC__ __inline #endif struct js_keyword * js_kw_lookup (str, len) register const char *str; register unsigned int len; { static struct js_keyword wordlist[] = //定义hash表 { {""}, {""}, #line 39 "jscript.gperf" {"in", TK_IN}, #line 45 "jscript.gperf" {"new", TK_NEW}, #line 66 "jscript.gperf" {"with", TK_WITH}, #line 65 "jscript.gperf" {"while", TK_WHILE}, {""}, {""}, #line 41 "jscript.gperf" {"int", TK_INT}, #line 42 "jscript.gperf" {"interface", TK_INTERFACE}, #line 58 "jscript.gperf" {"throw", TK_THROW}, #line 55 "jscript.gperf" {"switch", TK_SWITCH}, #line 28 "jscript.gperf" {"extends", TK_EXTENDS}, {""}, #line 57 "jscript.gperf" {"this", TK_THIS}, #line 52 "jscript.gperf" {"short", TK_SHORT}, #line 59 "jscript.gperf" {"throws", TK_THROWS}, {""}, {""}, #line 27 "jscript.gperf" {"else", TK_ELSE}, #line 40 "jscript.gperf" {"instanceof", TK_INSTANCEOF}, #line 51 "jscript.gperf" {"return", TK_RETURN}, #line 36 "jscript.gperf" {"if", TK_IF}, {""}, #line 61 "jscript.gperf" {"true", TK_TRUE}, {""}, {""}, #line 48 "jscript.gperf" {"private", TK_PRIVATE}, {""}, {""}, #line 30 "jscript.gperf" {"final", TK_FINAL}, #line 44 "jscript.gperf" {"native", TK_NATIVE}, #line 24 "jscript.gperf" {"default", TK_DEFAULT}, {""}, #line 60 "jscript.gperf" {"transient", TK_TRANSIENT}, #line 32 "jscript.gperf" {"float", TK_FLOAT}, #line 26 "jscript.gperf" {"double", TK_DOUBLE}, #line 47 "jscript.gperf" {"package", TK_PACKAGE}, #line 33 "jscript.gperf" {"for", TK_FOR}, #line 64 "jscript.gperf" {"void", TK_VOID}, #line 29 "jscript.gperf" {"false", TK_FALSE}, #line 53 "jscript.gperf" {"static", TK_STATIC}, #line 25 "jscript.gperf" {"do", TK_DO}, #line 23 "jscript.gperf" {"continue", TK_CONTINUE}, #line 20 "jscript.gperf" {"char", TK_CHAR}, #line 22 "jscript.gperf" {"const", TK_CONST}, {""}, #line 31 "jscript.gperf" {"finally", TK_FINALLY}, #line 63 "jscript.gperf" {"var", TK_VAR}, #line 49 "jscript.gperf" {"protected", TK_PROTECTED}, #line 21 "jscript.gperf" {"class", TK_CLASS}, {""}, #line 15 "jscript.gperf" {"boolean", TK_BOOLEAN}, #line 62 "jscript.gperf" {"try", TK_TRY}, #line 18 "jscript.gperf" {"case", TK_CASE}, #line 19 "jscript.gperf" {"catch", TK_CATCH}, #line 38 "jscript.gperf" {"import", TK_IMPORT}, {""}, #line 34 "jscript.gperf" {"function", TK_FUNCTION}, #line 46 "jscript.gperf" {"null", TK_NULL}, #line 37 "jscript.gperf" {"implements", TK_IMPLEMENTS}, {""}, {""}, {""}, {""}, #line 54 "jscript.gperf" {"super", TK_SUPER}, {""}, #line 56 "jscript.gperf" {"synchronized", TK_SYNCHRONIZED}, {""}, #line 17 "jscript.gperf" {"byte", TK_BYTE}, #line 16 "jscript.gperf" {"break", TK_BREAK}, {""}, {""}, #line 14 "jscript.gperf" {"abstract", TK_ABSTRACT}, #line 35 "jscript.gperf" {"goto", TK_GOTO}, {""}, #line 50 "jscript.gperf" {"public", TK_PUBLIC}, {""}, {""}, #line 43 "jscript.gperf" {"long", TK_LONG} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = js_kw_hash (str, len); if (key <= MAX_HASH_VALUE && key >= 0) { register const char *s = wordlist[key].name; if (*str == *s && !strcmp (str + 1, s + 1)) return &wordlist[key]; } } return 0; } #line 67 "jscript.gperf" int main(void) { char *js_word[] = {"protected", "throws", "with", 0}; char *no_js_word[] = {"protectef", "throwd", "witp", 0}; char ** cp; cp = &js_word[0];  while (*cp != NULL)     {         struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));         printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");         printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");         printf("%s->%d\n",p->name,p->token);         cp++;     } cp = &no_js_word[0]; while (*cp != NULL) { printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in"); cp++; }  return 0; } 我们主要关注两个函数:

static unsigned int js_kw_hash (str, len):根据传入的字符串生成哈希值

struct js_keyword * js_kw_lookup (str, len):根据传入的字符串进行查找操作

编译

运行:

详解参见:https://www.ibm.com/developerworks/cn/linux/l-gperf.html

转载请注明原文地址: https://www.6miu.com/read-31544.html

最新回复(0)