from nltk_lite import tag, chunk sent = chunk.tagstr2tree("the/DT little/JJ cat/NN sat/VBD on/IN the/DT mat/NN in/IN New/NNP York/NNP") print sent rule1 = chunk.ChunkRule('
*', 'Chunk sequences of DT, JJ and NN') chunkparser = chunk.RegexpChunk([rule1], chunk_node='NP', top_node='S') chunk_tree = chunkparser.parse(sent, trace=1) print chunk_tree rule2 = chunk.ChunkRule('+', "Chunk proper nouns") chunkparser = chunk.RegexpChunk([rule1, rule2], chunk_node='NP', top_node='S') chunk_tree = chunkparser.parse(sent, trace=1) print chunk_tree chunk_tree.draw() chunkall_rule = chunk.ChunkRule('<.*>+', 'Chunk everything') remove_rule = chunk.ChinkRule('+', 'Remove sequences of VBD and IN') chunkparser = chunk.RegexpChunk([chunkall_rule, remove_rule], chunk_node='NP', top_node='S') chunk_tree = chunkparser.parse(sent, trace=1) print chunk_tree chink_rule = chunk.ChinkRule('+', 'Chink sequences of NN and DT') chunk_rule = chunk.ChunkRule('+', 'Chunk sequences of NN, JJ, and DT') chunkparser = chunk.RegexpChunk([chunk_rule, chink_rule], chunk_node='NP', top_node='S') chunk_tree = chunkparser.parse(sent, trace=1) print chunk_tree