#!/usr/bin/env python
# relation extraction with pattern

import sys
import string
import re
from pattern.en import parsetree
from pattern.en import pprint

if len(sys.argv) >= 2:
    textname = sys.argv[1]
else:
    exit('need filename on cmd line') 

textf = open(textname, "r")
text = textf.read()

# strip weird ASCII
nochangetable = string.maketrans('', '')
deletethese = nochangetable[:9] + nochangetable[11:12] + nochangetable[14:31] + nochangetable[128:]
text = text.translate(nochangetable, deletethese)
spacetable = string.maketrans('\n\x0c', '  ')
text = text.translate(spacetable)
# replace multiple spaces with one
text = ' '.join(text.split())

pt = parsetree(text, tokenize=True, tags=True, chunks=True, relations=True, lemmata=True)
relations = list()
for s in pt:
    for chunk in s.chunks:
        if chunk.role == 'SBJ':
            sbj = chunk.string
            if chunk.verb:
                vrb = chunk.verb.string
            else:
                vrb = '[none]'
            if chunk.object:
                obj = chunk.object.string
            else:
                obj = '[none]'
            rel = (sbj,vrb,obj)
            relations.append(rel)

for r in relations:
    for i,w in enumerate(r):
        if w != '[none]':
            if i > 0:
                print '|', w,
            else:
                print w,
    print
    
