NLTK10《Python自然语言处理》code09 建立基于特征的文法

xiaoxiao2021-02-28 92

建立基于特征的文法

# -*- coding: utf-8 -*- # win10 python3.5.3/python3.6.1 nltk3.2.4 # 《Python自然语言处理》 09 建立基于特征的文法 # pnlp09.py import nltk # 9.1 文法特征 kim = {'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'} chase = {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase'} # 对象kim、chase有一些共同特征，CAT(文法类别)、ORTH(正字法，即拼写) # 具有面向语义的特征：kim['REF']表示kim的指示物，chase['REL']表示chase表示的关系 chase['AGT'] = 'sbj' # sbj：主语 chase['PAT'] = 'obj' # obj:宾语 sent = "Kim chased Lee" tokens = sent.split() lee = {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'} def lex2fs(word): for fs in [kim, lee, chase]: if fs['ORTH'] ==word: return fs subj, verb, obj = lex2fs(tokens[0]), lex2fs(tokens[1]), lex2fs(tokens[2]) verb['AGT'] = subj['REF'] # agent of 'chase' is Kim verb['PAT'] = obj['REF'] # patient of 'chase' is Lee for k in ['ORTH', 'REL', 'AGT', 'PAT']: # check featstruct of 'chase' print("%-5s => %s" % (k, verb[k])) """ ORTH => chased REL => chase AGT => k PAT => l """ surprise = {'CAT': 'V', 'ORTH': 'surprised', 'REL': 'surprise', 'SRC': 'sbj', 'EXP': 'obj'} # 句法协议 # 使用属性和约束 # 例9-1 基于特征的文法例子 nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg') """ % start S # ################### # Grammar Productions # ################### # S expansion productions S -> NP[NUM=?n] VP[NUM=?n] # NP expansion productions NP[NUM=?n] -> N[NUM=?n] NP[NUM=?n] -> PropN[NUM=?n] NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n] NP[NUM=pl] -> N[NUM=pl] # VP expansion productions VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n] VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP # ################### # Lexical Productions # ################### Det[NUM=sg] -> 'this' | 'every' Det[NUM=pl] -> 'these' | 'all' Det -> 'the' | 'some' | 'several' PropN[NUM=sg]-> 'Kim' | 'Jody' N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child' N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' IV[TENSE=pres, NUM=sg] -> 'disappears' | 'walks' TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes' IV[TENSE=pres, NUM=pl] -> 'disappear' | 'walk' TV[TENSE=pres, NUM=pl] -> 'see' | 'like' IV[TENSE=past] -> 'disappeared' | 'walked' TV[TENSE=past] -> 'saw' | 'liked' """ # 例9-2 跟踪基于特征的图表分析器 tokens = 'Kim likes children'.split() from nltk import load_parser cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2) trees = cp.parse(tokens) """ |.Kim .like.chil.| Leaf Init Rule: |[----] . .| [0:1] 'Kim' |. [----] .| [1:2] 'likes' |. . [----]| [2:3] 'children' Feature Bottom Up Predict Combine Rule: |[----] . .| [0:1] PropN[NUM='sg'] -> 'Kim' * Feature Bottom Up Predict Combine Rule: |[----] . .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] * Feature Bottom Up Predict Combine Rule: |[----> . .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'} Feature Bottom Up Predict Combine Rule: |. [----] .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' * Feature Bottom Up Predict Combine Rule: |. [----> .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'} Feature Bottom Up Predict Combine Rule: |. . [----]| [2:3] N[NUM='pl'] -> 'children' * Feature Bottom Up Predict Combine Rule: |. . [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] * Feature Bottom Up Predict Combine Rule: |. . [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'} Feature Single Edge Fundamental Rule: |. [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] * Feature Single Edge Fundamental Rule: |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] * """ for tree in trees:print(tree) """ (S[] (NP[NUM='sg'] (PropN[NUM='sg'] Kim)) (VP[NUM='sg', TENSE='pres'] (TV[NUM='sg', TENSE='pres'] likes) (NP[NUM='pl'] (N[NUM='pl'] children)))) """ # 术语 # 9.2 处理特征结构 fs1 = nltk.FeatStruct(TENSE='past', NUM='sg') print(fs1) """ [ NUM = 'sg' ] [ TENSE = 'past' ] """ fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem') print(fs1['GND']) # fem fs1['CASE'] = 'acc' fs2 = nltk.FeatStruct(POS='N', AGR=fs1) print(fs2) """ [ [ CASE = 'acc' ] ] [ AGR = [ GND = 'fem' ] ] [ [ NUM = 'pl' ] ] [ [ PER = 3 ] ] [ ] [ POS = 'N' ] """ print(fs2['AGR']) """ [ CASE = 'acc' ] [ GND = 'fem' ] [ NUM = 'pl' ] [ PER = 3 ] """ print(fs2['AGR']['PER']) # 3 print(nltk.FeatStruct("[POS='N', AGR=[PER=3, NUM='pl', GND='fem']]")) """ [ [ GND = 'fem' ] ] [ AGR = [ NUM = 'pl' ] ] [ [ PER = 3 ] ] [ ] [ POS = 'N' ] """ print(nltk.FeatStruct(name='Lee', telno='01 27 86 42 96', age=33)) """ [ age = 33 ] [ name = 'Lee' ] [ telno = '01 27 86 42 96' ] """ print(nltk.FeatStruct("""[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'], SPOUSE=[NAME='Kim', ADDRESS->(1)]]""")) """ [ ADDRESS = (1) [ NUMBER = 74 ] ] [ [ STREET = 'rue Pascal' ] ] [ ] [ NAME = 'Lee' ] [ ] [ SPOUSE = [ ADDRESS -> (1) ] ] [ [ NAME = 'Kim' ] ] """ print(nltk.FeatStruct("[A='a', B=(1)[C='c'], D->(1), E->(1)]")) """ [ A = 'a' ] [ ] [ B = (1) [ C = 'c' ] ] [ ] [ D -> (1) ] [ E -> (1) ] """ # 包含和统一 fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal') fs2 = nltk.FeatStruct(CITY='Paris') print(fs1.unify(fs2)) """ [ CITY = 'Paris' ] [ NUMBER = 74 ] [ STREET = 'rue Pascal' ] """ print(fs2.unify(fs1)) """ [ CITY = 'Paris' ] [ NUMBER = 74 ] [ STREET = 'rue Pascal' ] """ fs0 = nltk.FeatStruct(A='a') fs1 = nltk.FeatStruct(A='b') fs2 = fs0.unify(fs1) print(fs2) # None fs0 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=[NUMBER=74,STREET='rue Pascal'], SPOUSE=[NAME=Kim, ADDRESS=[number=74,STREET='rue Pascal']]]""") print(fs0) """ [ ADDRESS = [ NUMBER = 74 ] ] [ [ STREET = 'rue Pascal' ] ] [ ] [ NAME = 'Lee' ] [ ] [ [ ADDRESS = [ STREET = 'rue Pascal' ] ] ] [ SPOUSE = [ [ number = 74 ] ] ] [ [ ] ] [ [ NAME = 'Kim' ] ] """ fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]") print(fs1.unify(fs0)) """ [ ADDRESS = [ NUMBER = 74 ] ] [ [ STREET = 'rue Pascal' ] ] [ ] [ NAME = 'Lee' ] [ ] [ [ [ CITY = 'Paris' ] ] ] [ [ ADDRESS = [ STREET = 'rue Pascal' ] ] ] [ SPOUSE = [ [ number = 74 ] ] ] [ [ ] ] [ [ NAME = 'Kim' ] ] """ fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'], SPOUSE=[NAME=Kim, ADDRESS->(1)]]""") print(fs1.unify(fs2)) """ [ ADDRESS = (1) [ NUMBER = 74 ] ] [ [ STREET = 'rue Pascal' ] ] [ ] [ NAME = 'Lee' ] [ ] [ SPOUSE = [ ADDRESS -> (1) ] ] [ [ NAME = 'Kim' ] ] """ fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]") fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]") print(fs2) """ [ ADDRESS1 = ?x ] [ ADDRESS2 = ?x ] """ print(fs2.unify(fs1)) """ [ ADDRESS1 = (1) [ NUMBER = 74 ] ] [ [ STREET = 'rue Pascal' ] ] [ ] [ ADDRESS2 -> (1) ] """ # 9.3 扩展基于特征的文法 # 子类别 # 核心词 # 助动词和倒装 # 无限制依赖成分 # 例9-3 具有倒装从句和长距离依赖的产生式的文法，使用斜线类别 nltk.data.show_cfg('grammars/book_grammars/feat1.fcfg') """ % start S # ################### # Grammar Productions # ################### S[-INV] -> NP VP S[-INV]/?x -> NP VP/?x S[-INV] -> NP S/NP S[-INV] -> Adv[+NEG] S[+INV] S[+INV] -> V[+AUX] NP VP S[+INV]/?x -> V[+AUX] NP VP/?x SBar -> Comp S[-INV] SBar/?x -> Comp S[-INV]/?x VP -> V[SUBCAT=intrans, -AUX] VP -> V[SUBCAT=trans, -AUX] NP VP/?x -> V[SUBCAT=trans, -AUX] NP/?x VP -> V[SUBCAT=clause, -AUX] SBar VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x VP -> V[+AUX] VP VP/?x -> V[+AUX] VP/?x # ################### # Lexical Productions # ################### V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing' V[SUBCAT=trans, -AUX] -> 'see' | 'like' V[SUBCAT=clause, -AUX] -> 'say' | 'claim' V[+AUX] -> 'do' | 'can' NP[-WH] -> 'you' | 'cats' NP[+WH] -> 'who' Adv[+NEG] -> 'rarely' | 'never' NP/NP -> Comp -> 'that' """ tokens = 'who do you claim that you like'.split() from nltk import load_parser cp = load_parser('grammars/book_grammars/feat1.fcfg') for tree in cp.parse(tokens): print(tree) """ (S[-INV] (NP[+WH] who) (S[+INV]/NP[] (V[+AUX] do) (NP[-WH] you) (VP[]/NP[] (V[-AUX, SUBCAT='clause'] claim) (SBar[]/NP[] (Comp[] that) (S[-INV]/NP[] (NP[-WH] you) (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] ))))))) """ tokens = 'you claim that you like cats'.split() for tree in cp.parse(tokens): print(tree) """ (S[-INV] (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='clause'] claim) (SBar[] (Comp[] that) (S[-INV] (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='trans'] like) (NP[-WH] cats)))))) """ tokens = 'rarely do you sing'.split() for tree in cp.parse(tokens): print(tree) """ (S[-INV] (Adv[+NEG] rarely) (S[+INV] (V[+AUX] do) (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='intrans'] sing)))) """ # 例9-4 基于特征的文法的例子 nltk.data.show_cfg('grammars/book_grammars/german.fcfg') """ % start S # Grammar Productions S -> NP[CASE=nom, AGR=?a] VP[AGR=?a] NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a] NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a] ... """ tokens = 'ich folge den Katzen'.split() cp = nltk.load_parser('grammars/book_grammars/german.fcfg') for tree in cp.parse(tokens): print(tree) """ (S[] (NP[AGR=[NUM='sg', PER=1], CASE='nom'] (PRO[AGR=[NUM='sg', PER=1], CASE='nom'] ich)) (VP[AGR=[NUM='sg', PER=1]] (TV[AGR=[NUM='sg', PER=1], OBJCASE='dat'] folge) (NP[AGR=[GND='fem', NUM='pl', PER=3], CASE='dat'] (Det[AGR=[NUM='pl', PER=3], CASE='dat'] den) (N[AGR=[GND='fem', NUM='pl', PER=3]] Katzen)))) """ tokens = 'ich folge den Katze'.split() cp = nltk.load_parser('grammars/book_grammars/german.fcfg', trace=2) for tree in cp.parse(tokens): print(tree) """ |.ich.fol.den.Kat.| Leaf Init Rule: |[---] . . .| [0:1] 'ich' |. [---] . .| [1:2] 'folge' |. . [---] .| [2:3] 'den' |. . . [---]| [3:4] 'Katze' Feature Bottom Up Predict Combine Rule: |[---] . . .| [0:1] PRO[AGR=[NUM='sg', PER=1], CASE='nom'] -> 'ich' * Feature Bottom Up Predict Combine Rule: |[---] . . .| [0:1] NP[AGR=[NUM='sg', PER=1], CASE='nom'] -> PRO[AGR=[NUM='sg', PER=1], CASE='nom'] * Feature Bottom Up Predict Combine Rule: |[---> . . .| [0:1] S[] -> NP[AGR=?a, CASE='nom'] * VP[AGR=?a] {?a: [NUM='sg', PER=1]} Feature Bottom Up Predict Combine Rule: |. [---] . .| [1:2] TV[AGR=[NUM='sg', PER=1], OBJCASE='dat'] -> 'folge' * Feature Bottom Up Predict Combine Rule: |. [---> . .| [1:2] VP[AGR=?a] -> TV[AGR=?a, OBJCASE=?c] * NP[CASE=?c] {?a: [NUM='sg', PER=1], ?c: 'dat'} Feature Bottom Up Predict Combine Rule: |. . [---] .| [2:3] Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] -> 'den' * |. . [---] .| [2:3] Det[AGR=[NUM='pl', PER=3], CASE='dat'] -> 'den' * Feature Bottom Up Predict Combine Rule: |. . [---> .| [2:3] NP[AGR=?a, CASE=?c] -> Det[AGR=?a, CASE=?c] * N[AGR=?a, CASE=?c] {?a: [NUM='pl', PER=3], ?c: 'dat'} Feature Bottom Up Predict Combine Rule: |. . [---> .| [2:3] NP[AGR=?a, CASE=?c] -> Det[AGR=?a, CASE=?c] * N[AGR=?a, CASE=?c] {?a: [GND='masc', NUM='sg', PER=3], ?c: 'acc'} Feature Bottom Up Predict Combine Rule: |. . . [---]| [3:4] N[AGR=[GND='fem', NUM='sg', PER=3]] -> 'Katze' * """

转载请注明原文地址: https://www.6miu.com/read-44502.html

技术

最新回复(0)