Home Technical Overview Tanl Pipeline API

NER pipeline

Python source code for the pipeline used by the NER Service.


# Tanl pipeline
p1 = Splitta('italian.splitta/').pipe([text])
p2 = Tokenizer().pipe(p1)
p3 = HmmTagger('italian.hmm').pipe(p2)
p4 = MorphSplitter('it').pipe(p3)
p5 = NerLR('italian.LR').pipe(p4)

# Collect entities
for s in p5:
   for t in s:
      cur = t['NETAG']
      form = t['FORM']
         if cur != 'O':
              res += "%s\t%s\n" % (form, cur)

Parser pipeline

Python source code for the pipeline used by the Parse Service.

# Create corpus
c = Corpus.create("it", "CoNLL")
    
# Tanl pipeline
p1 = SentenceSplitter('italian.splitta/').pipe([text])
p2 = Tokenizer().pipe(p1)
p3 = HmmTagger('italian.hmm').pipe(p2)
p4 = MorphSplitter('it').pipe(p3)
p5 = Parser.create('italian.MLP').pipe(p4)
    
# Parse text
for s in p5:
  print c.toString(s)
  print