from qt import QAction, QApplication, QFileDialog, QFont, QFontDialog, \ QHBoxLayout, QIconSet, QKeySequence, QMainWindow, QMessageBox, QPopupMenu, \ QSettings, QString, Qt, QWidget import codecs import sys import re import os import shutil import types import ag from getopt import getopt from sets import Set from time import time import tempfile from xml.dom.minidom import parse, getDOMImplementation ###### import icons from scrollableText import TokenText, TokenDetailBox from objects import * from preferences import * from twisted.plugin import getPlugins import tokenizer import myparser import plugins ###### class tmaConfig: """ Config file reading and writing code shamelessly stolen from Kazuaki Maeda's ACE annotation tool """ def __init__(self, initialConfig = {}, domain="ldc.upenn.edu",product="Unnamed App"): self.domain=domain if product.endswith("/"): self.product=product else: self.product="%s/" % product self.config = {} self.default = { "Geometries/MainWindowX" : 0, "Geometries/MainWindowY" : 0, "Geometries/MainWindowWidth" : 1100, "Geometries/MainWindowHeight" : 900, "Fonts/Text/Family": "ClearlyU", "Fonts/Text/PointSize" : 12, "Fonts/Text/Weight" : 50, "Fonts/Text/Italic" : 0, "Fonts/Text/Bold" : 0, "Fonts/Text/Script" : "Unicode", "Colors/Highlight/NonParsing": "#FF0000", "Colors/Highlight/Ambiguous": "#FFFF33", "Colors/Highlight/Selected": "#FF00FF", "Colors/Highlight/Unambiguous": "#90EE90", "Colors/Highlight/Unparsed": "#FF0000", "Plugins/Tokenizer/SelectedTokenizer" : "", "Plugins/Parser/SelectedParser" : "", "Display/ShowParses" : True, "Display/ShowMorphemes" : False, "Display/ShowGlosses": False, "Display/TextDirection": "Left-to-Right", "Display/Spacing/Lines": 5, "Display/Spacing/Paragraphs": 10, "Display/Spacing/Words": 5, "Encoding" : "utf-8", "RecentlyOpenedTextFiles" : "", "RecentlyOpenedAnnotationFiles" : "", "TextDirectory": "./", "AnnotationDirectory": "./", "CurrentAnnotationFile": "", "Notes/Labels":"" } self.config = self.default def valueOf(self,key,default=""): if self.config.has_key(key): return self.config[key] else: self.readSetting(key,default=default) return self.config[key] def setValue(self,key,value): self.config[key]=value self.writeSettings() def readSettings(self, settings=None, filename=None): if settings is None: settings = QSettings() if filename: dirname=os.path.dirname(os.path.abspath(filename)) settings.insertSearchPath(QSettings.Windows,dirname) settings.insertSearchPath(QSettings.Unix,dirname) settings.insertSearchPath(QSettings.Mac,dirname) else: settings.setPath(self.domain,self.product) for k in self.config.keys(): self.readSetting(k, settings=settings) return def readSetting(self, k, settings=None, filename=None,default=""): if settings is None: settings = QSettings() settings.setPath(self.domain,self.product) if self.config.has_key(k): v = self.config[k] else: v = "" if self.default.has_key(k): v=self.default[k] else: v=default if isinstance(v, types.BooleanType): f=settings.readBoolEntry elif isinstance(v, types.IntType): f=settings.readNumEntry elif isinstance(v, types.FloatType): f=settings.readDoubleEntry else: f=settings.readEntry self.config[k],f = f("/%s/%s" % (self.product,k),v) if isinstance(self.config[k],QString): self.config[k]=str(self.config[k]) def loadXML(self, filename): myDom=parse(filename) for node in myDom.getElementsByTagName("setting"): attr=node.getAttribute("attribute") val=node.getAttribute("value") self.setValue(attr,val) def writeXML(self, filename): impl=getDOMImplementation() #dtd=impl.createDocumentType("response","",dtdFile) dtd=None myDom=impl.createDocument(None,"settings",dtd) settingsNode=myDom.documentElement attributes=self.config.keys() attributes.sort() for attr in attributes: settingNode=myDom.createElement("setting") settingNode.setAttribute("attribute",attr) settingNode.setAttribute("value","%s" % self.config[attr]) settingsNode.appendChild(settingNode) fp=open(filename,"w") fp.write(myDom.toprettyxml()) fp.close() def writeSettings(self): settings = QSettings() settings.setPath(self.domain,self.product) for k in self.config.keys(): settings.writeEntry("/%s/%s" % (self.product,k), self.config[k]) def restoreDefaults(self): self.config = {} settings = QSettings() for k in self.default.keys(): self.config[k] = self.default[k] self.writeSettings() class tptMain(QWidget): def __init__(self, displayParent, master, clipboard): QWidget.__init__(self, displayParent, "main", Qt.WDestructiveClose) self.displayParent=displayParent self.master=master self.clipboard = clipboard self.config=self.master.config self.layout = QHBoxLayout(self) self.layout.setAutoAdd(1) self.tokenDisplay=TokenText(self, self, self.master.config, self.controlEvent) self.tokenDetailDisplay=TokenDetailBox(self, self,self.controlEvent) self.tokenizer=None self.parser=None self.annotationFilename=None self.AGSetId=None self.noteTypeDialog=None self.clear() self.setFont() self.setTypes() def undo(self): pass def redo(self): pass def copy(self): text=self.tokenDisplay.selectedText() if text: self.clipboard.setText(text) def paste(self): text=self.clipboard.getText() self.tokenDisplay.putText(text) def pastePlain(self): text=self.clipboard.getText() self.tokenDisplay.putPlainText(text) def find(self): pass def findAgain(self): pass def findPrevious(self): pass def displayChanged(self, changeDict): if [t for t in changeDict if changeDict[t]] and self.fullText: self.tokenDisplay.refreshDisplay() def notesChanged(self, changeDict): if [t for t in changeDict if changeDict[t]] and self.fullText: types=self.config.valueOf("Notes/Labels").split(";;") toDelete=[] for note in self.problemNote2Id.iterkeys(): if note in toDelete: continue pType=note.getType() if pType not in types: out=self.getNewType(pType) if out: newType,applyToAll=out else: self.fileClose() if applyToAll: toChange=[x for x in self.problemNote2Id.iterkeys() if x.getType() == pType] else: toChange=[note,] if newType==pType: types.append(pType) self.config.setValue("Notes/Labels",";;".join(types)) elif not(newType): toDelete += toChange else: for x in toChange: x.setType(newType) for note in toDelete: note.delete() self.setTypes() self.tokenDetailDisplay.showToken(self.currentToken) def getNewType(self, oldType): if not(self.noteTypeDialog): self.noteTypeDialog=NoteTypeDialog(self) ok=self.noteTypeDialog.handleType(oldType, self.config.valueOf("Notes/Labels").split(";;")) if ok: return (self.noteTypeDialog_newType, self.noteTypeDialog_applyToAll) else: return None def setTypes(self): types=self.config.valueOf("Notes/Labels").split(";;") self.tokenDetailDisplay.noteList.setTypes(types) def pluginsChanged(self, changeDict): if changeDict["Tokenizer"] and self.fullText: ## The selected tokenizer has changed. Re-tokenize the text? ok=QMessageBox.warning(self,"Tokenizer Changed", "The selected tokenizer has been changed. \n" +\ "Tokenization information may be out of synch. \n" +\ "Re-run tokenization of all text now?", QMessageBox.Ok + QMessageBox.Default, QMessageBox.Cancel + QMessageBox.Escape) if ok == QMessageBox.Ok: self.runTokenizer() if changeDict["Parser"] and self.fullText: ok=QMessageBox.warning(self,"Parser Changed", "The selected parser has been changed. \n" +\ "Parsing information may be out of synch. \n" +\ "Re-run parser over all tokens now?", QMessageBox.Ok + QMessageBox.Default, QMessageBox.Cancel + QMessageBox.Escape) if ok==QMessageBox.Ok: self.runParser() return def editPreferences(self): funcDict={"plugins":self.pluginsChanged, "display":self.displayChanged, "notes":self.notesChanged} PreferencesDialog(self, self, self.config, funcDict).exec_loop() def editFont(self): (f,ok)=QFontDialog.getFont(self.textFont) if ok: self.setFont(f) def modifyToken(self): if not(self.currentToken): return def mergeTokens(self, t1, t2): oldEnd=t1.getEndOffset() newStart=t1.getStartOffset() newEnd=t2.getEndOffset() text=self.fullText[t1.getStartOffset():t2.getEndOffset()] self.deleteCurrentToken(token=t2) for p in t1.getParses(): del self.id2Parse[p.annId] del self.parse2Id[p] t1.deleteParses() self.setTokenText(t1,text) self.tokenDisplay.refreshDisplay() self.selectToken(t1) def splitToken(self, token, splitPoint): if not(token): token=self.currentToken if not(token): return if not(splitPoint): return ## Create a new Token newStart=token.getStartOffset() + splitPoint newEnd=token.getEndOffset() newParsing=token.isParsing() newToken=Token(AGId=self.AGId, startOffset=newStart, endOffset=newEnd, textObject=self.textObject, parsing=newParsing) id=newToken.annId self.token2Id[newToken]=id self.id2Token[id]=newToken self.tokenDisplay.insertToken(newToken, previous=token) ## Adjust the end offset of the existing token token.setEndOffset(token.getStartOffset() + splitPoint) self.selectToken(token) def deleteCurrentToken(self, token=None): if not(token): token=self.currentToken if not(token): return self.selectToken(token) self.setTokenText(self.currentToken,"") self.tokenDisplay.removeToken(self.currentToken) for p in token.getParses(): del self.id2Parse[p.annId] del self.parse2Id[p] id=self.token2Id[token] del self.token2Id[token] del self.id2Token[id] token.delete() self.currentToken=None def setTokenText(self, token,text=""): if not(token): return oldStart=token.getStartOffset() oldEnd=token.getEndOffset() # If a token is being deleted, delete any space between it and the # next valid token. if not(text): followers=[x for x in self.token2Id if x.getStartOffset() >= oldEnd] if followers: followers.sort(lambda a,b:cmp(a.getStartOffset(),b.getStartOffset())) oldEnd=followers[0].getStartOffset() ## Modify the source text file print self.master.Writer print self.encoding print self.textfile try: oldText=self.fullText newText=oldText[:oldStart] + text + oldText[oldEnd:] fp=open(self.textfile,"w") self.master.Writer[self.encoding](fp).write(newText) fp.close() self.fullText=newText self.textObject.setText(self.fullText) except Exception, e: QMessageBox.warning(self,"Unable to make changes to the source text file.\n", "The following error occured " +\ "when opening the text file for writing:\n%s\n\n" % e +\ "Please check the error and try again.", QMessageBox.Ok + QMessageBox.Default) oldSize=oldEnd-oldStart newSize=len(text) delta=newSize-oldSize ## Update the offsets for this token token.setEndOffset(oldEnd+delta) ## Update the offsets for all following tokens for t in [x for x in self.token2Id if x.getStartOffset() > oldStart]: t.shift(delta) def parseAllTokens(self): self.parseTokens([x for x in self.token2Id.keys() if x.isParsing()]) def parseTokens(self, tokenList): if [x for x in tokenList if x.parses]: ok=QMessageBox.warning(self,"Annoations to be Deleted", "Running the parser will delete all previous " +\ "parses.\n\nThis action cannot be undone.", QMessageBox.Ok + QMessageBox.Default, QMessageBox.Cancel + QMessageBox.Escape) if ok <> QMessageBox.Ok: return self.clearAllParses(tokenList) selectedParser=self.config.valueOf("Plugins/Parser/SelectedParser") potentialParsers=getPlugins(myparser.ParserPlugin,plugins) selected=[x for x in potentialParsers if x.displayName==selectedParser] if selected: self.parser=selected[0] for attr in self.parser.preferences: if self.parser.preferences[attr]=="string": d="" elif self.parser.preferences[attr]=="int": d=0 elif self.parser.preferences[attr]=="list":d="" toget="Plugins/%s/%s" % (self.parser.displayName, attr) self.parser.values[attr]=self.config.valueOf(toget,default=d) self.parser.initialize() self.parser.start() else: self.parser=None wordlist=Set(t.getText() for t in tokenList) try: parseDict=self.parser.parse(wordlist) except Exception, e: parseDict=self.parser.parse(wordlist) QMessageBox.warning(self,"Running the parser failed","Unknown Error.\n\n" +\ "Please check the error and try again.", QMessageBox.Ok + QMessageBox.Default) return for t in tokenList: try: potentials=parseDict[t.getText()] except: potentials=[] for pot in potentials: p=Parse(AGId=self.AGId, gloss=pot["gloss"],underlyingForm=pot["morpheme"],token=t) self.parse2Id[p]=p.annId self.id2Parse[p.annId]=p self.tokenDisplay.refreshDisplay() if self.currentToken in tokenList: self.selectToken(self.currentToken) def parseSelection(self): self.main.parseSelectedTokens() def controlEvent(self, e): type=e["type"] name=e["name"] if type=="token": if name=="selected": token=e["token"] self.selectToken(token) elif name=="textChanged": text=e["text"] self.setTokenText(self.currentToken,text) self.tokenDisplay.refreshDisplay() elif name=="mergePrevious": self.mergePrevious() elif name=="mergeNext": self.mergeNext() elif name=="delete": self.deleteCurrentToken() elif name=="parsableChanged": val=e["value"] if not(self.currentToken): return self.currentToken.setParsing(val) self.tokenDisplay.refreshDisplay(self.currentToken) elif name=="parseChanged": parse=e["parse"] if not(self.currentToken): return self.currentToken.setCurrentParse(parse) self.tokenDisplay.refreshDisplay() elif name=="parseCurrent": self.parseTokens([self.currentToken,]) elif name=="splitRequested": token=self.currentToken p=e["splitPoint"] self.splitToken(self.currentToken,p) elif name=="newProblemNote": text=e["text"] pType=e["pType"] n=ProblemNote(AGId=self.AGId,token=self.currentToken,text=text,pType=pType) self.id2ProblemNote[n.annId]=n self.problemNote2Id[n]=n.annId return n def mergeNext(self): this=self.currentToken try: others=[x for x in self.token2Id if x.getStartOffset()>=this.getEndOffset()] others.sort(lambda a,b:cmp(a.getStartOffset(),b.getStartOffset())) other=others[0] except: other=None if other: self.mergeTokens(this, other) def mergePrevious(self): this=self.currentToken try: others=[x for x in self.token2Id if x.getEndOffset()<=this.getStartOffset()] others.sort(lambda a,b:cmp(a.getStartOffset(),b.getStartOffset())) other=others[-1] except: other=None if other: self.mergeTokens(other, this) def selectToken(self, t): self.tokenDisplay.selectToken(t) if t: isFirst=not([x for x in self.token2Id if x.getStartOffset() < t.getStartOffset()]) else: isFirst=1 if t: isLast=not([x for x in self.token2Id if x.getStartOffset() > t.getStartOffset()]) else: isLast=1 self.tokenDetailDisplay.showToken(t, isFirst, isLast) self.currentToken=t def clearCurrentParses(self): if not(self.currentToken): return self.clearAllParses([self.currentToken,]) def clearAllParses(self, tokenList=None): if not(tokenList): tokenList=self.token2Id.keys() for t in tokenList: for parse in [x for x in t.parses]: parse.delete() self.parse2Id={} self.id2Parse={} def clear(self): self.id2Token={} self.token2Id={} self.id2Parse={} self.parse2Id={} self.id2ProblemNote={} self.problemNote2Id={} self.fullText=None self.encoding=self.master.config.valueOf("Encoding") self.showParses=self.master.config.valueOf("ShowParses") self.showMorphemes=self.master.config.valueOf("ShowMorphemes") self.showGlosses=self.master.config.valueOf("ShowGlosses") self.currentToken=None self.tokenDisplay.clear() self.tokenDetailDisplay.clear() def setFont(self, f=None): if not(f): family=self.master.config.valueOf("Fonts/Text/Family") pointSize=self.master.config.valueOf("Fonts/Text/PointSize") weight=self.master.config.valueOf("Fonts/Text/Weight") italic=self.master.config.valueOf("Fonts/Text/Italic") bold=self.master.config.valueOf("Fonts/Text/Bold") f=QFont(family, pointSize, weight, italic) else: family=f.family() pointSize=f.pointSize() weight=f.weight() italic=f.italic() bold=f.bold() config=self.master.config config.setValue("Fonts/Text/Family",family) config.setValue("Fonts/Text/PointSize",pointSize) config.setValue("Fonts/Text/Weight",weight) config.setValue("Fonts/Text/Italic",italic) config.setValue("Fonts/Text/Bold",bold) self.textFont=f self.tokenDisplay.setFont(f) self.tokenDetailDisplay.setFont(f) def unInitAG(self): self.AGId=None self.docid=None if self.AGSetId: ag.DeleteAGSet(self.AGSetId) self.AGSetId=None self.modified=0 def initAG(self, docid): self.docid=os.path.normcase(os.path.basename(docid)) if self.AGSetId: self.unInitAG() label = str(time()) self.label=label self.AGSetId=ag.CreateAGSet(self.docid) ag.SetFeature(self.AGSetId, "author", "") ag.SetFeature(self.AGSetId, "encoding", self.encoding) ag.SetFeature(self.AGSetId, "source", "") ag.SetFeature(self.AGSetId, "type", "text") ag.SetFeature(self.AGSetId, "uri", docid) ag.SetFeature(self.AGSetId, "version", "2.0") self.timelineId=ag.CreateTimeline(self.AGSetId) self.signalId=ag.CreateSignal(self.timelineId, "URI", "text", "NULL", "unknown", "char", "NULL") self.AGId=ag.CreateAG(self.AGSetId, self.timelineId) ag.SetFeature(self.AGId, "docid", self.docid) self.modified=0 return True def saveAG(self, filename=None): if filename is None: dir=self.master.config.valueOf("AnnotationDirectory") fn=QFileDialog.getSaveFileName(dir,"All AG xml Files (*.ag.xml);; All Files (*)",self,"Save Annotation File","Save Annotation File",None,0) if fn.isEmpty(): return filename=os.path.abspath(str(fn.local8Bit())) dir=os.path.dirname(filename) self.master.config.setValue("AnnotationDirectory", dir) self.annotationFilename=str(filename) lines=ag.toXML(self.AGSetId).split("\n") if not(lines): return label=self.label excludeLine="uri" f=open(filename,"w") for line in lines: if excludeLine not in line: line=re.sub(label,"",line) f.write("%s\n" % line) f.close() self.modified=0 def closeFile(self): self.clear() self.unInitAG() self.close() def getEncoding(self): return self.encoding def setEncoding(self, e): self.encoding=e def bindLoadAnnotationFile(self, fname=None): if fname and os.path.exists(fname): fn=fname else: dir=self.master.config.valueOf("AnnotationDirectory") fn=QFileDialog.getOpenFileName(dir, "All AG xml Files (*.ag.xml);; All Files (*)", self, "Open Annotation File", "Open Annotation File", None, 0) if fn.isEmpty(): self.close() return fn=os.path.normcase(str(fn.local8Bit())) dir=os.path.dirname(fn) self.uriBase=dir self.master.config.setValue("AnnotationDirectory", dir) self.loadAnnotationFile(fn) def loadAnnotationFile(self, filename=None): if not(filename): self.bindLoadAnnotationFile() return self.unInitAG() agIds=self.loadConvertedAG(filename) self.AGId=agIds[0] self.AGSetId=ag.GetAGSetId(self.AGId) self.annotationFilename=filename self.loadTextFile(ag.GetFeature(self.AGSetId,"uri"),new=0) self.loadObjectsFromAG() tokenList=[] s=0 for p in self.fullText.split("\n\n"): e=s+len(p) tokenList.append([x for x in self.token2Id if x.getStartOffset() >= s and x.getEndOffset() <= e]) tokenList[-1].sort() s=e self.tokenDisplay.showTokens(tokenList) def loadConvertedAG(self, filename): label = str(time()) self.label=label agIds=ag.Load("AG", filename, "", {}, {"DTDvalidation": "false"}) id=ag.GetAGSetId(agIds[0]) tmpfile=tempfile.mktemp(label) f = open(filename,"r") f2 = open(tmpfile,"w") lines=f.xreadlines() excludeLine="uri" for line in lines: if excludeLine not in line: line=line.replace(id,"%s%s" % (id,label)) f2.write(line) f.close() f2.close() ag.DeleteAGSet(id) agSet=ag.Load("AG",tmpfile, "", {}, {"DTDvalidation": "false"}) os.unlink(tmpfile) return agSet def exportTextFile(self): fn=QFileDialog.getSaveFileName(dir, "Text Files (*.txt);; All Files (*)", self, "Export Text File", "Export Text File", None, 0) if fn.isEmpty(): self.close() return fn=str(fn.local8Bit()) if not(fn): return fp=open(fn) fp.write(self.toPrettyText()) fp.close() def toPrettyText(self): return self.tokenDisplay.toPrettyText() def bindLoadTextFile(self, fname=None): if fname and os.path.exists(fname): fn=fname else: dir=self.master.config.valueOf("TextDirectory") fn=QFileDialog.getOpenFileName(dir, "Text Files (*.txt);; All Files (*)", self, "Open Text File", "Open Text File", None, 0) if fn.isEmpty(): self.close() return fn=str(fn.local8Bit()) self.master.config.setValue("TextDirectory", dir) self.loadTextFile(fn,new=1) def loadTextFile(self, filename="", new=0): if not(filename): self.bindLoadTextFile() return self.textfile=filename if new: self.unInitAG() self.initAG(filename) try: E,D,R,W=codecs.lookup(self.encoding) fp=open(filename,"r") text=R(fp).read() fp.close() self.fullText=text self.textObject=TextObject(self.fullText) except Exception, e: QMessageBox.warning(self,"File Open Failed", "The following erorr occured " +\ "when opening the text file:\n%s" % e, QMessageBox.Ok + QMessageBox.Default) self.clear() self.unInitAG() if new: self.runTokenizer() def loadObjectsFromAG(self): (self.id2Token, self.token2Id)=createAllTokens(self.AGId,self.textObject) (self.id2Parse, self.parse2Id)=createAllParses(self.AGId,self.id2Token) (self.id2ProblemNote, self.problemNote2Id)=createAllProblemNotes(self.AGId,self.id2Token) for t in self.token2Id: t.loadCurrentParse() def runTokenizer(self): """ Give a warning against removing all existing Token information? Assume that running the tokenizer over text will delete any existing parses. """ if self.parse2Id: ok=QMessageBox.warning(self,"Annoations to be Deleted", "Running the tokenizer over the " +\ "document will delete all previous " +\ "parses.\n\nThis action cannot be undone.%s" % e, QMessageBox.Ok + QMessageBox.Default, QMessageBox.Cancel + QMessageBox.Escape) if ok <> QMessageBox.Ok: return self.clearAllParses() for token in self.token2Id.keys(): id=self.token2Id[token] token.delete() del self.token2Id[token] del self.id2Token[id] selectedTokenizer=self.config.valueOf("Plugins/Tokenizer/SelectedTokenizer") potentialTokenizers=getPlugins(tokenizer.TokenizerPlugin,plugins) selected=[x for x in potentialTokenizers if x.displayName==selectedTokenizer] if selected: self.tokenizer=selected[0] for attr in self.tokenizer.preferences: if self.tokenizer.preferences[attr]=="string": d="" elif self.tokenizer.preferences[attr]=="int": d=0 elif self.tokenizer.preferences[attr]=="list":d="" toget="Plugins/%s/%s" % (self.tokenizer.displayName, attr) self.tokenizer.values[attr]=self.config.valueOf(toget,default=d) self.tokenizer.initialize() self.tokenizer.start() else: self.tokenizer=None tokenList=[] if self.tokenizer: ## Call the tokenizer and create tokens from the results try: offset=0 for p in self.fullText.split("\n\n"): tokenList.append([]) tokenText=self.tokenizer.tokenize(p) for start,end,parsing in tokenText: tokenList[-1].append(Token(AGId=self.AGId, startOffset=start+offset, endOffset=end+offset, textObject=self.textObject,parsing=parsing)) offset += len(p) + 2 except: QMessageBox.warning(self,"Tokenizer failed", "The tokenizer could not be run.\n" +\ "Please check the tokenizer settings and try again.", QMessageBox.Ok + QMessageBox.Default) else: ok=QMessageBox.warning(self,"No tokenizer selected", "No tokenizer plugin has been selected, so " +\ "the entire document will be added to a single " +\ "token.\nTo select a tokenizer, go to the Plugins " +\ "tab of the Preferences dialog.", QMessageBox.Ok + QMessageBox.Default) if not(tokenList): tokenList=[(Token(AGId=self.AGId, startOffset=0, endOffset=len(self.fullText), textObject=self.textObject, parsing=True),)] for p in tokenList: for t in p: self.token2Id[t]=t.annId self.id2Token[t.annId]=t ## Update the display in the TokenText self.tokenDisplay.showTokens(tokenList) def approveUnambiguousParses(self): for t in [x for x in self.token2Id if x.isUnambiguous()]: t.setCurrentParse(t.getParses()[0]) self.tokenDisplay.refreshDisplay() def nextAmbiguity(self): self.goToNext(lambda a:a.isAmbiguous() and not(a.isSelected()), "ambiguous") def previousAmbiguity(self): self.goToPrevious(lambda a:a.isAmbiguous(), "ambiguous") def nextParse(self): self.goToNext(lambda a:a.isSelected(), "selected") def previousParse(self): self.goToPrevious(lambda a:a.isSelected(), "selected") def nextUnparsed(self): self.goToNext(lambda a:a.isUnparsed(), "unparsed") def previousUnparsed(self): self.goToPrevious(lambda a:a.isUnparsed(), "unparsed") def goToNext(self, filter=None, adj="matching", forceBegin=False): if self.currentToken and not(forceBegin): s=self.currentToken.getStartOffset() else: s=0 tokens=[x for x in self.token2Id if filter(x) and x.getStartOffset() > s] if tokens: tokens.sort(lambda a,b:cmp(a.getStartOffset(),b.getStartOffset())) self.selectToken(tokens[0]) else: QMessageBox.information(self,"No more tokens","There are no more %s tokens in the document.." % adj) def goToPrevious(self, filter=None, adj="matching", forceEnd=False): if self.currentToken and not(forceEnd): e=self.currentToken.getStartOffset() else: e=len(self.fullText) tokens=[x for x in self.token2Id if filter(x) and x.getStartOffset() < e] if tokens: tokens.sort(lambda a,b:cmp(a.getStartOffset(),b.getStartOffset())) self.selectToken(tokens[-1]) else: QMessageBox.information(self,"No more tokens","There are no more %s tokens in the document.." % adj) def showStats(self): numTokens=len(self.token2Id) numParsedTokens=len([t for t in self.token2Id if not(t.isUnparsed())]) numUnparsedTokens=numTokens-numParsedTokens numAmbiguousTokens=len([t for t in self.token2Id if t.isAmbiguous()]) numSelectedTokens=len([t for t in self.token2Id if t.isSelected()]) numUnselectedTokens=numParsedTokens-numSelectedTokens msg="Number of tokens in text: %s\n" % numTokens +\ "Number of tokens in text which do/do not have parses: %s/%s\n" % (numParsedTokens,numUnparsedTokens) +\ "Number of tokens in text which parse ambiguously: %s\n" % (numAmbiguousTokens) +\ "Number of tokens in text for which the user has/has not chosen an approved parse: %s/%s\n" % (numSelectedTokens,numUnselectedTokens) QMessageBox.information(self,"Document Statistics",msg) def close(self): if self.tokenizer: try: self.tokenizer.uninitialize() except: QMessageBox.warning(self,"Tokenizer Error\n", "Unable to uninitialize tokenizer.\n " +\ "Check tokenizer settings.", QMessageBox.Ok + QMessageBox.Default) if self.parser: try: self.parser.uninitialize() except: QMessageBox.warning(self,"Parser Error\n", "Unable to uninitialize parser.\n " +\ "Check parser settings.", QMessageBox.Ok + QMessageBox.Default) class TextObject: def __init__(self, text): self.setText(text) def getText(self, start, end): return self.text[start:end] def setText(self,text): self.text=text class NoteTypeDialog(QDialog): def __init__(self, parent): QDialog.__init__(self, parent) self.parent=parent QVBoxLayout(self) self.layout().setAutoAdd(True) self.labelText="The problem note type %s is not" +\ " a part of the current configuration.\n" +\ "What would you like to do with problem notes" +\ " of this type?" self.label=QLabel(self) self.bg=QButtonGroup() self.bg.insert(QRadioButton("Delete", self)) self.bg.insert(QRadioButton("Add to the configuration file", self)) hb=QHBox(self) self.bg.insert(QRadioButton("Change to...", hb)) self.newTypeBox=QComboBox(self) self.newTypeBox.setEnabled(False) self.applyAllButton=QCheckBox("Apply to all notes of this type", self) hb=QHButtonGroup(self) self.connect(QPushButton("OK",hb),SIGNAL("clicked()"),self.accept) self.connect(QPushButton("Cancel",hb),SIGNAL("clicked()"),self.accept) self.connect(self.bg,SIGNAL("clicked(int)"),self.changed) self.connect(self.newTypeBox,SIGNAL("activated(int)"),self.selected) self.connect(self.applyAllButton,SIGNAL("clicked()"),self.applyAll) def applyAll(self): self.parent.noteTypeDialog_applyToAll = self.applyAllButton.isChecked() def selected(self, i): self.parent.noteTypeDialog_newType=str(self.newTypeBox.text(i)) def changed(self, i): if i == 0: self.parent.noteTypeDialog_newType=None self.newTypeBox.setEnabled(False) elif i == 1: self.parent.noteTypeDialog_newType=self.oldType self.newTypeBox.setEnabled(False) else: self.newTypeBox.setEnabled(True) self.parent.noteTypeDialog_newType=str(self.newTypeBox.currentText()) def handleType(self, oldType, allowedTypes): self.parent.noteTypeDialog_applyToAll=False self.parent.noteTypeDialog_newType=0 self.oldType=oldType self.newTypeBox.clear() self.newTypeBox.insertStrList(allowedTypes) self.label.setText(self.labelText % oldType) return self.exec_loop() class mainWindow(QMainWindow): """ Main window for TextMorphoAnalyzer tool """ def __init__(self,settingsFile=None,debug=False, clipboard=None): QMainWindow.__init__(self,None,'Text Morphological Analyzer',Qt.WDestructiveClose) self.settingsFile=settingsFile self.debug=debug self.clipboard=clipboard ## Read in system-stored config file. self.config = tmaConfig(domain="ldc.upenn.edu",product="Beta Text Processing Tool") self.readSettings() self.main = tptMain(self, self, clipboard) self.setCentralWidget(self.main) self.createMenus() self.disableInactiveMenus() def disableInactiveMenus(self): self.actions["editUndo"].setEnabled(False) self.actions["editRedo"].setEnabled(False) self.actions["editCopy"].setEnabled(False) self.actions["editPaste"].setEnabled(False) self.actions["editPastePlain"].setEnabled(False) self.actions["editFind"].setEnabled(False) self.actions["editFindAgain"].setEnabled(False) self.actions["editFindPrevious"].setEnabled(False) def readSettings(self): self.config.readSettings(self.settingsFile) self.settingsFile=None ## For now, just handle UTF-8. Add configurable encodings later? self.Encode={} self.Decode={} self.Reader={} self.Writer={} for e in set(("utf-8",self.config.valueOf("Encoding"))): self.Encode[e],self.Decode[e],self.Reader[e],self.Writer[e]=codecs.lookup(e) def addMenu(self,title,parent=None): if parent: parentMenu=self.menus[parent.replace("&","")] else: parentMenu=self.menuBar() menu=QPopupMenu(self) if parent: self.menus[(parent.replace("&",""),title.replace("&",""))]=menu else: self.menus[title.replace("&","")]=menu parentMenu.insertItem(title,menu) def addAction(self,name,title,icon,label,toDo,shortcut=None,toggle=False,sendTo=None): if icon: icon=QIconSet(QPixmap(icon)) if shortcut <> None: key=Qt.Key_A + (ord(shortcut.upper())-ord("A")) shortcut=Qt.CTRL+key else: shortcut=QKeySequence() if icon: self.actions[name]=QAction(title,icon,label,shortcut,self,"%s Action" % name,toggle) else: self.actions[name]=QAction(title,label,shortcut,self,"%s Action" % name,toggle) if sendTo: self.connect(self.actions[name],SIGNAL("activated()"),sendTo,toDo) else: self.connect(self.actions[name],SIGNAL("activated()"),toDo) def createMenus(self): self.menus={} self.addMenu("&File") self.addMenu("&Edit") self.addMenu("&Token") self.addMenu("&Parses") self.addMenu("T&ools") self.addMenu("&Help") self.createActions() self.addToMenu("File",actionName="fileOpen") self.addToMenu("File") self.addToMenu("File",actionName="fileSave") self.addToMenu("File",actionName="fileSaveAs") self.addToMenu("File") self.addToMenu("File",submenuName="Import...") self.addToMenu("File",submenuName="Import...",actionName="fileImport") self.addToMenu("File",submenuName="Import...",actionName="configImport") self.addToMenu("File",submenuName="Export...") self.addToMenu("File",submenuName="Export...",actionName="fileExport") self.addToMenu("File",submenuName="Export...",actionName="configExport") self.addToMenu("File") self.addToMenu("File",actionName="fileClose") self.addToMenu("File",actionName="fileQuit") self.addToMenu("Edit",actionName="editUndo") self.addToMenu("Edit",actionName="editRedo") self.addToMenu("Edit") self.addToMenu("Edit",actionName="editCopy") self.addToMenu("Edit",actionName="editPaste") self.addToMenu("Edit",actionName="editPastePlain") self.addToMenu("Edit") self.addToMenu("Edit",submenuName="Find...") self.addToMenu("Edit",submenuName="Find...",actionName="editFind") self.addToMenu("Edit",submenuName="Find...",actionName="editFindAgain") self.addToMenu("Edit",submenuName="Find...",actionName="editFindPrevious") self.addToMenu("Edit") self.addToMenu("Edit",actionName="editPreferences") self.addToMenu("Edit",actionName="editFont") self.addToMenu("Token",actionName="tokenModify") self.addToMenu("Token",actionName="tokenMergeNext") self.addToMenu("Token",actionName="tokenMergePrevious") self.addToMenu("Token",actionName="tokenSplit") self.addToMenu("Token",actionName="tokenDelete") self.addToMenu("Parses") self.addToMenu("Parses",actionName="parsesNext") self.addToMenu("Parses",actionName="parsesPrevious") self.addToMenu("Parses") self.addToMenu("Parses",actionName="parsesClearSelection") self.addToMenu("Parses") self.addToMenu("Parses",actionName="parsesClearAll") self.addToMenu("Tools",submenuName="Parse...") self.addToMenu("Tools",submenuName="Parse...",actionName="toolsParseDocument") self.addToMenu("Tools",submenuName="Parse...",actionName="toolsParseSelection") self.addToMenu("Tools") self.addToMenu("Tools",submenuName="Goto...") self.addToMenu("Tools",submenuName="Goto...",actionName="toolsNextAmbiguity") self.addToMenu("Tools",submenuName="Goto...",actionName="toolsPreviousAmbiguity") self.addToMenu("Tools",submenuName="Goto...",actionName="toolsNextUnparsed") self.addToMenu("Tools",submenuName="Goto...",actionName="toolsPreviousUnparsed") self.addToMenu("Tools") self.addToMenu("Tools",actionName="toolsApproveAll") self.addToMenu("Tools") self.addToMenu("Tools",actionName="toolsDocumentStats") self.addToMenu("Help",actionName="helpAbout") def addToMenu(self,menuName,submenuName=None,actionName=None): if menuName and submenuName and not(actionName): self.addMenu(submenuName,menuName) elif actionName: if submenuName: menu=self.menus[(menuName.replace("&",""),submenuName.replace("&",""))] else: menu=self.menus[menuName.replace("&","")] self.actions[actionName].addTo(menu) else: self.menus[menuName].insertSeparator() def createActions(self): self.actions={} self.addAction("fileNew","Open a blank text to edit",icons.filenew,"New",self.fileNew,"N") self.addAction("fileOpen","Open an existing parse",icons.fileopen,"Open...",self.fileOpen,"O") self.addAction("fileSave","Save the text and parses",icons.filesave,"Save...",self.fileSave,"S") self.addAction("fileSaveAs","Save the current file with a new name as a copy",None,"Save As...",self.fileSaveAs) self.addAction("fileImport","Import text from plain text file",None,"Import Text File",self.fileImport) self.addAction("fileExport","Export text to plain text file",None,"Export Text File",self.fileExport) self.addAction("fileClose","Close th current file",None,"Close",self.fileClose) self.addAction("fileQuit","Exit the program",None,"Quit",SLOT('closeAllWindows()'),"Q",sendTo=qApp) self.addAction("configImport","Import an XML configuration file",None,"Import Configuration File",self.configImport) self.addAction("configExport","Export configuration to XML file",None,"Export Configuration File",self.configExport) self.addAction("editUndo","Undo previous action",icons.editundo,"Undo",self.undo,"Z") self.addAction("editRedo","Redo previous action",icons.editredo,"Redo",self.redo) self.addAction("editCopy","Send selected text to the clipboard",icons.editcopy,"Copy",self.copy,"C") self.addAction("editPaste","Paste in selection from the clipboard",icons.editpaste,"Paste",self.paste,"V") self.addAction("editPastePlain","Paste in selection as plain text",None,"Past as Plain Text",self.pastePlain) self.addAction("editFind","Search for text in document",icons.find,"Find in Document",self.find,"F") self.addAction("editFindAgain","Search for next instance of text in document",icons.findagain,"Find Again",self.findAgain,"G") self.addAction("editFindPrevious","Search for previous instance of text in doucment",icons.findprevious,"Find Previous",self.findPrevious) self.addAction("editPreferences","Edit application settings",None,"Preferences",self.editPreferences) self.addAction("editFont","Edit application font",None,"Font",self.editFont) self.addAction("tokenModify","Modify the boundaries for selected token",icons.tokenmodify,"Modify",self.modifyToken) self.addAction("tokenMergeNext","Merge the selected token with the following token",icons.tokenmerge,"Merge Next",self.mergeNext) self.addAction("tokenMergePrevious","Merge the selected token with the preceding token",icons.tokenmerge,"Merge Previous",self.mergePrevious) self.addAction("tokenSplit","Split the selected token in two",icons.tokensplit,"Split",self.splitToken) self.addAction("tokenDelete","Delete the selected token",icons.tokendelete,"Delete",self.deleteToken) self.addAction("parsesNext","Go to the next parse",icons.annnext,"Next",self.nextParse) self.addAction("parsesPrevious","Go to the previous parse",icons.annprevious,"Previous",self.previousParse) self.addAction("parsesClearSelection","Clear all parses on selection",icons.annclearselection,"Clear Selection",self.clearSelectedParses) self.addAction("parsesClearAll","Clear all parses on document",icons.annclearall,"Clear All",self.clearAllParses) self.addAction("toolsParseDocument","Parse the entire document",icons.parsedocument,"Parse Document",self.parseDocument) self.addAction("toolsParseSelection","Parse the selected text",icons.parseselection,"Parse Selection",self.parseSelection) self.addAction("toolsNextAmbiguity","Go to next ambiguous parse",icons.nextambiguity,"Next Ambiguity",self.nextAmbiguity) self.addAction("toolsPreviousAmbiguity","Go to previous ambiguous parse",icons.previousambiguity,"Previous Ambiguity",self.previousAmbiguity) self.addAction("toolsApproveAll","Approve all unambiguous parses",None,"Approve Unambiguous Parses",self.approveUnambiguousParses) self.addAction("toolsNextUnparsed","Go to the next unparsed word",icons.nextunparsed,"Next Unparsed Word",self.nextUnparsed) self.addAction("toolsPreviousUnparsed","Go to the previous unparsed word",icons.previousunparsed,"Previous Unparsed Word",self.previousUnparsed) self.addAction("toolsDocumentStats","Display statistics about the current document",None,"Document Stats",self.showStats) self.addAction("helpAbout","Display application about dialog",None,"About",self.about) def fileNew(self): QMessageBox.information(self,"Not implemented","File New has not been implemented yet.") def fileOpen(self): self.main.bindLoadAnnotationFile() def fileClose(self): self.main.closeFile() def fileSave(self): self.main.saveAG(self.main.annotationFilename) def fileSaveAs(self): self.main.saveAG() def fileImport(self): self.main.bindLoadTextFile() def fileExport(self): self.main.exportTextFile() def configImport(self): fn=QFileDialog.getOpenFileName(os.getcwd(), "All xml Files (*.xml);; All Files (*)", self, "Open Configuration File", "Open Configuration File", None, 0) if fn.isEmpty(): self.close() return fn=str(fn.local8Bit()) self.config.loadXML(fn) def configExport(self): fn=QFileDialog.getSaveFileName(os.getcwd(), "All xml Files (*.xml);; All Files (*)", self, "Save Configuration File", "Save Configuration File", None, 0) if fn.isEmpty(): self.close() return fn=str(fn.local8Bit()) self.config.writeXML(fn) def undo(self): self.main.undo() def redo(self): self.main.redo() def copy(self): self.main.copy() def paste(self): self.main.paste() def pastePlain(self): self.main.pastePlain() def find(self): self.main.find() def findAgain(self): self.main.findAgain() def findPrevious(self): self.main.findPrevious() def editPreferences(self): self.main.editPreferences() def editFont(self): self.main.editFont() def modifyToken(self): self.main.modifyCurrentToken() def mergeNext(self): self.main.mergeNext() def mergePrevious(self): self.main.mergePrevious() def splitToken(self): self.main.splitCurrentToken() def deleteToken(self): self.main.deleteCurrentToken() def nextParse(self): self.main.nextParse() def previousParse(self): self.main.previousParse() def clearSelectedParses(self): self.main.clearCurrentParses() def clearAllParses(self): self.main.clearAllParses() def parseDocument(self): self.main.parseAllTokens() def parseSelection(self): self.main.parseSelectedTokens() def nextAmbiguity(self): self.main.nextAmbiguity() def previousAmbiguity(self): self.main.previousAmbiguity() def approveUnambiguousParses(self): self.main.approveUnambiguousParses() def nextUnparsed(self): self.main.nextUnparsed() def previousUnparsed(self): self.main.previousUnparsed() def showStats(self): self.main.showStats() def about(self): QMessageBox.about(self,"About Text Morpho Analyzer","The 'About' text should go here.") def closeEvent(self,ce): g = self.geometry() self.config.setValue("Geometries/MainWindowX",g.x()) self.config.setValue("Geometries/MainWindowY",g.y()) self.config.setValue("Geometries/MainWindowWidth",g.width()) self.config.setValue("Geometries/MainWindowHeight",g.height()) self.main.close() ce.accept() def main(): def Usage(): print """Usage: %s [options]""" % sys.argv[0] try: opts, args = getopt(sys.argv[1:], 's:d') except: sys.exit(printUsage()) settingsFile=None debug=False for o,a in opts: if o == "-s": settingsFile=a elif o == "-d": print "turn on debug" debug=True else: sys.exit(printUsage()) a = QApplication(sys.argv) #a.setStyle("cde") c = a.clipboard() c.setSelectionMode(1) mw = mainWindow(settingsFile=settingsFile,debug=debug, clipboard=c) mw.show() a.connect(a, SIGNAL('lastWindowClosed()'), a, SLOT('quit()')) a.exec_loop() if __name__=="__main__": main()