from nltk_lite import tag, chunk
sent = chunk.tagstr2tree("the/DT little/JJ cat/NN sat/VBD on/IN the/DT mat/NN in/IN New/NNP York/NNP")
print sent
rule1 = chunk.ChunkRule('
*', 'Chunk sequences of DT, JJ and NN')
chunkparser = chunk.RegexpChunk([rule1], chunk_node='NP', top_node='S')
chunk_tree = chunkparser.parse(sent, trace=1)
print chunk_tree
rule2 = chunk.ChunkRule('+', "Chunk proper nouns")
chunkparser = chunk.RegexpChunk([rule1, rule2], chunk_node='NP', top_node='S')
chunk_tree = chunkparser.parse(sent, trace=1)
print chunk_tree
chunk_tree.draw()
chunkall_rule = chunk.ChunkRule('<.*>+', 'Chunk everything')
remove_rule = chunk.ChinkRule('+', 'Remove sequences of VBD and IN')
chunkparser = chunk.RegexpChunk([chunkall_rule, remove_rule], chunk_node='NP', top_node='S')
chunk_tree = chunkparser.parse(sent, trace=1)
print chunk_tree
chink_rule = chunk.ChinkRule('+',
'Chink sequences of NN and DT')
chunk_rule = chunk.ChunkRule('+',
'Chunk sequences of NN, JJ, and DT')
chunkparser = chunk.RegexpChunk([chunk_rule, chink_rule],
chunk_node='NP', top_node='S')
chunk_tree = chunkparser.parse(sent, trace=1)
print chunk_tree