aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPo-Chuan Hsieh <sunpoet@FreeBSD.org>2022-03-25 13:34:52 +0000
committerPo-Chuan Hsieh <sunpoet@FreeBSD.org>2022-03-25 13:38:23 +0000
commit0ecfc3c70bbb6579d8a6cf74dd870e4159695c61 (patch)
treecbacfe0126d11e0900c97ce98e078ead1a2243a4
parent28825935b92cb4de657f1051336acd5b6d9d6578 (diff)
downloadports-0ecfc3c70bbb6579d8a6cf74dd870e4159695c61.tar.gz
ports-0ecfc3c70bbb6579d8a6cf74dd870e4159695c61.zip
textproc/py-wordnet: Fix build with setuptools 58.0.0+
With hat: python
-rw-r--r--textproc/py-wordnet/Makefile4
-rw-r--r--textproc/py-wordnet/files/concordance.py128
-rw-r--r--textproc/py-wordnet/files/patch-2to31594
3 files changed, 1725 insertions, 1 deletions
diff --git a/textproc/py-wordnet/Makefile b/textproc/py-wordnet/Makefile
index daa8fc6b94ae..3fa937b7cef0 100644
--- a/textproc/py-wordnet/Makefile
+++ b/textproc/py-wordnet/Makefile
@@ -14,7 +14,7 @@ DISTFILES= pywordnet-${PORTVERSION}.tar.gz:pywn \
MAINTAINER= ports@FreeBSD.org
COMMENT= Python Interface to WordNet
-USES= python:3.6+
+USES= dos2unix python:3.6+
USE_PYTHON= distutils autoplist
WRKSRC= ${WRKDIR}/py${PORTNAME}-${PORTVERSION}
@@ -29,6 +29,8 @@ WN_DICTFILES= adj.exc adv.exc cntlist cntlist.rev data.adj data.adv \
verb.exc
post-patch:
+# concordance.py uses CR line terminators which cannot be fixed by USES=dos2unix
+ @${CP} ${FILESDIR}/concordance.py ${WRKSRC}/concordance.py
@${SED} -e 's|/usr/local/wordnet2.0|${PREFIX}/${WORDNETDATA}|g' \
-e 's|os.path.join(WNHOME.*))|WNHOME)|g' ${WRKSRC}/wordnet.py \
> ${WRKSRC}/wordnet.py.tmp && ${CAT} ${WRKSRC}/wordnet.py.tmp \
diff --git a/textproc/py-wordnet/files/concordance.py b/textproc/py-wordnet/files/concordance.py
new file mode 100644
index 000000000000..89caef0036ab
--- /dev/null
+++ b/textproc/py-wordnet/files/concordance.py
@@ -0,0 +1,128 @@
+# some accessing of the semantic concordance data for wordnet 1.6
+# by Des Berry, berry@ais.it
+
+import string, os
+from wordnet import binarySearchFile
+
+# Sample entries in the 'taglist' file
+# ordinary%1:18:01:: 1 br-a01:78,1;86,1;88,4
+# ordered%5:00:00:organized:01 2 br-j23:6,14;13,32;66,12
+# where the general form is:
+# lemma%ss_type:lex_filenum:lex_id:head_word:head_id sense_number
+[location_list]
+# location_list: filename:sent_num,word_num[;sent_num,word_num...]
+
+ss_type = ("NOUN", "VERB", "ADJECTIVE", "ADVERB", "ADJECTIVE SATELLITE")
+
+# given a sentence number (and the contents of a semantic concordance file)
+# return a string of words as the sentence
+def find_sentence(snum, msg):
+ str = "<s snum=%s>" % snum
+ s = string.find(msg, str)
+ if s < 0:
+ return "<Unknown>"
+ s = s + len(str)
+ sentence = ""
+ tag = ""
+ while 1:
+ if msg[s] == '\n':
+ s = s + 1
+ n = string.find(msg, '<', s)
+ if n < 0:
+ break
+ if n - s != 0:
+ if tag == "w" and msg[s] != "'" and len(sentence) > 0: # word form
+ sentence = sentence + " "
+ sentence = sentence + msg[s:n]
+ e = string.find(msg, '>', n)
+ if e < 0:
+ break
+ tag = msg[n+1]
+ if tag == "/": #check for ending sentence
+ if msg[n+2] == 's':
+ #end of sentence
+ break
+ s = e + 1
+ return sentence
+
+# given a taglist sense (one line of the tagfile) and where to find the tagfile (root)
+# return a tuple of
+# symset type ('1' .. '5')
+# sense (numeric character string)
+# list of sentences (constructed from the taglist)
+def tagsentence(tag, root):
+ s = string.find(tag, '%')
+ sentence = []
+ type = tag[s+1]
+ c = s
+ for i in range(0,4):
+ c = string.find(tag, ':', c + 1)
+ c = string.find(tag, ' ', c + 1)
+ sense = tag[c+1]
+ c = c + 3
+ while 1:
+ d = string.find(tag, ' ', c) # file separator
+ if d < 0:
+ loclist = tag[c:]
+ else:
+ loclist = tag[c:d]
+ c = d + 1
+
+ e = string.find(loclist, ':')
+ filename = loclist[:e]
+ fh = open(root + filename, "rb")
+ msg = fh.read()
+ fh.close()
+
+ while 1:
+ e = e + 1
+ f = string.find(loclist, ';', e)
+ if f < 0:
+ sent_word = loclist[e:]
+ else:
+ sent_word = loclist[e:f]
+ e = f
+
+ g = string.find(sent_word, ',')
+ sent = sent_word[:g]
+
+ sentence.append(find_sentence(sent, msg))
+
+ if f < 0:
+ break
+
+ if d < 0:
+ break
+ return (type, sense, sentence)
+
+# given a word to search for and where to find the files (root)
+# displays the information
+# This could be changed to display in different ways!
+def sentences(word, root):
+ cache = {}
+ file = open(root + "taglist", "rb")
+ key = word + "%"
+ keylen = len(key)
+ binarySearchFile(file, key + " ", cache, 10)
+ print("Word '%s'" % word)
+ while 1:
+ line = file.readline()
+ if line[:keylen] != key:
+ break
+ type, sense, sentence = tagsentence(line, root + "tagfiles/")
+ print(ss_type[string.atoi(type) - 1], sense)
+ for sent in sentence:
+ print(sent)
+
+
+def _test(word, corpus, base):
+ print(corpus)
+ sentences("ordinary", base + corpus + "/")
+
+if __name__ == '__main__':
+ base = "C:/win16/dict/semcor/"
+ word = "ordinary"
+ _test(word, "brown1", base)
+ _test(word, "brown2", base)
+ _test(word, "brownv", base)
+
diff --git a/textproc/py-wordnet/files/patch-2to3 b/textproc/py-wordnet/files/patch-2to3
new file mode 100644
index 000000000000..aba2f4cf6d93
--- /dev/null
+++ b/textproc/py-wordnet/files/patch-2to3
@@ -0,0 +1,1594 @@
+--- wntools.py.orig 2004-07-19 05:09:43 UTC
++++ wntools.py
+@@ -33,6 +33,7 @@ __author__ = "Oliver Steele <steele@osteele.com>"
+ __version__ = "2.0"
+
+ from wordnet import *
++from functools import reduce
+
+ #
+ # Domain utilities
+@@ -41,9 +42,9 @@ from wordnet import *
+ def _requireSource(entity):
+ if not hasattr(entity, 'pointers'):
+ if isinstance(entity, Word):
+- raise TypeError, `entity` + " is not a Sense or Synset. Try " + `entity` + "[0] instead."
++ raise TypeError(repr(entity) + " is not a Sense or Synset. Try " + repr(entity) + "[0] instead.")
+ else:
+- raise TypeError, `entity` + " is not a Sense or Synset"
++ raise TypeError(repr(entity) + " is not a Sense or Synset")
+
+ def tree(source, pointerType):
+ """
+@@ -64,9 +65,9 @@ def tree(source, pointerType):
+ >>> #pprint(tree(dog, HYPONYM)) # too verbose to include here
+ """
+ if isinstance(source, Word):
+- return map(lambda s, t=pointerType:tree(s,t), source.getSenses())
++ return list(map(lambda s, t=pointerType:tree(s,t), source.getSenses()))
+ _requireSource(source)
+- return [source] + map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType))
++ return [source] + list(map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType)))
+
+ def closure(source, pointerType, accumulator=None):
+ """Return the transitive closure of source under the pointerType
+@@ -78,7 +79,7 @@ def closure(source, pointerType, accumulator=None):
+ ['dog' in {noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: entity}]
+ """
+ if isinstance(source, Word):
+- return reduce(union, map(lambda s, t=pointerType:tree(s,t), source.getSenses()))
++ return reduce(union, list(map(lambda s, t=pointerType:tree(s,t), source.getSenses())))
+ _requireSource(source)
+ if accumulator is None:
+ accumulator = []
+@@ -193,7 +194,7 @@ def product(u, v):
+ >>> product("123", "abc")
+ [('1', 'a'), ('1', 'b'), ('1', 'c'), ('2', 'a'), ('2', 'b'), ('2', 'c'), ('3', 'a'), ('3', 'b'), ('3', 'c')]
+ """
+- return flatten1(map(lambda a, v=v:map(lambda b, a=a:(a,b), v), u))
++ return flatten1(list(map(lambda a, v=v:list(map(lambda b, a=a:(a,b), v)), u)))
+
+ def removeDuplicates(sequence):
+ """Return a copy of _sequence_ with equal items removed.
+@@ -242,12 +243,12 @@ def getIndex(form, pos='noun'):
+ transformed string until a match is found or all the different
+ strings have been tried. It returns a Word or None."""
+ def trySubstitutions(trySubstitutions, form, substitutions, lookup=1, dictionary=dictionaryFor(pos)):
+- if lookup and dictionary.has_key(form):
++ if lookup and form in dictionary:
+ return dictionary[form]
+ elif substitutions:
+ (old, new) = substitutions[0]
+ substitute = string.replace(form, old, new) and substitute != form
+- if substitute and dictionary.has_key(substitute):
++ if substitute and substitute in dictionary:
+ return dictionary[substitute]
+ return trySubstitutions(trySubstitutions, form, substitutions[1:], lookup=0) or \
+ (substitute and trySubstitutions(trySubstitutions, substitute, substitutions[1:]))
+@@ -313,7 +314,7 @@ def morphy(form, pos='noun', collect=0):
+ exceptions = binarySearchFile(excfile, form)
+ if exceptions:
+ form = exceptions[string.find(exceptions, ' ')+1:-1]
+- if lookup and dictionary.has_key(form):
++ if lookup and form in dictionary:
+ if collect:
+ collection.append(form)
+ else:
+--- wordnet.py.orig 2004-07-19 06:11:31 UTC
++++ wordnet.py
+@@ -53,9 +53,9 @@ WNHOME = environ.get('WNHOME', {
+ 'mac': ":",
+ 'dos': "C:\\wn16",
+ 'nt': "C:\\Program Files\\WordNet\\2.0"}
+- .get(os.name, "/usr/local/wordnet2.0"))
++ .get(os.name, "/usr/local/share/py-wordnet"))
+
+-WNSEARCHDIR = environ.get('WNSEARCHDIR', os.path.join(WNHOME, {'mac': "Database"}.get(os.name, "dict")))
++WNSEARCHDIR = environ.get('WNSEARCHDIR', WNHOME)
+
+ ReadableRepresentations = 1
+ """If true, repr(word), repr(sense), and repr(synset) return
+@@ -210,15 +210,15 @@ class Word:
+
+ def __init__(self, line):
+ """Initialize the word from a line of a WN POS file."""
+- tokens = string.split(line)
+- ints = map(int, tokens[int(tokens[3]) + 4:])
+- self.form = string.replace(tokens[0], '_', ' ')
++ tokens = string.split(line)
++ ints = list(map(int, tokens[int(tokens[3]) + 4:]))
++ self.form = string.replace(tokens[0], '_', ' ')
+ "Orthographic representation of the word."
+- self.pos = _normalizePOS(tokens[1])
++ self.pos = _normalizePOS(tokens[1])
+ "Part of speech. One of NOUN, VERB, ADJECTIVE, ADVERB."
+- self.taggedSenseCount = ints[1]
++ self.taggedSenseCount = ints[1]
+ "Number of senses that are tagged."
+- self._synsetOffsets = ints[2:ints[0]+2]
++ self._synsetOffsets = ints[2:ints[0]+2]
+
+ def getPointers(self, pointerType=None):
+ """Pointers connect senses and synsets, not words.
+@@ -231,18 +231,18 @@ class Word:
+ raise self.getPointers.__doc__
+
+ def getSenses(self):
+- """Return a sequence of senses.
+-
+- >>> N['dog'].getSenses()
+- ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron})
+- """
+- if not hasattr(self, '_senses'):
+- def getSense(offset, pos=self.pos, form=self.form):
+- return getSynset(pos, offset)[form]
+- self._senses = tuple(map(getSense, self._synsetOffsets))
+- del self._synsetOffsets
+- return self._senses
++ """Return a sequence of senses.
+
++ >>> N['dog'].getSenses()
++ ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron})
++ """
++ if not hasattr(self, '_senses'):
++ def getSense(offset, pos=self.pos, form=self.form):
++ return getSynset(pos, offset)[form]
++ self._senses = tuple(map(getSense, self._synsetOffsets))
++ del self._synsetOffsets
++ return self._senses
++
+ # Deprecated. Present for backwards compatability.
+ def senses(self):
+ import wordnet
+@@ -253,70 +253,70 @@ class Word:
+ return self.getSense()
+
+ def isTagged(self):
+- """Return 1 if any sense is tagged.
+-
+- >>> N['dog'].isTagged()
+- 1
+- """
+- return self.taggedSenseCount > 0
++ """Return 1 if any sense is tagged.
++
++ >>> N['dog'].isTagged()
++ 1
++ """
++ return self.taggedSenseCount > 0
+
+ def getAdjectivePositions(self):
+- """Return a sequence of adjective positions that this word can
+- appear in. These are elements of ADJECTIVE_POSITIONS.
+-
+- >>> ADJ['clear'].getAdjectivePositions()
+- [None, 'predicative']
+- """
+- positions = {}
+- for sense in self.getSenses():
+- positions[sense.position] = 1
+- return positions.keys()
++ """Return a sequence of adjective positions that this word can
++ appear in. These are elements of ADJECTIVE_POSITIONS.
++
++ >>> ADJ['clear'].getAdjectivePositions()
++ [None, 'predicative']
++ """
++ positions = {}
++ for sense in self.getSenses():
++ positions[sense.position] = 1
++ return list(positions.keys())
+
+ adjectivePositions = getAdjectivePositions # backwards compatability
+
+ def __cmp__(self, other):
+- """
+- >>> N['cat'] < N['dog']
+- 1
+- >>> N['dog'] < V['dog']
+- 1
+- """
+- return _compareInstances(self, other, ('pos', 'form'))
++ """
++ >>> N['cat'] < N['dog']
++ 1
++ >>> N['dog'] < V['dog']
++ 1
++ """
++ return _compareInstances(self, other, ('pos', 'form'))
+
+ def __str__(self):
+- """Return a human-readable representation.
+-
+- >>> str(N['dog'])
+- 'dog(n.)'
+- """
+- abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'}
+- return self.form + "(" + abbrs[self.pos] + ")"
++ """Return a human-readable representation.
++
++ >>> str(N['dog'])
++ 'dog(n.)'
++ """
++ abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'}
++ return self.form + "(" + abbrs[self.pos] + ")"
+
+ def __repr__(self):
+- """If ReadableRepresentations is true, return a human-readable
+- representation, e.g. 'dog(n.)'.
+-
+- If ReadableRepresentations is false, return a machine-readable
+- representation, e.g. "getWord('dog', 'noun')".
+- """
+- if ReadableRepresentations:
+- return str(self)
+- return "getWord" + `(self.form, self.pos)`
+-
++ """If ReadableRepresentations is true, return a human-readable
++ representation, e.g. 'dog(n.)'.
++
++ If ReadableRepresentations is false, return a machine-readable
++ representation, e.g. "getWord('dog', 'noun')".
++ """
++ if ReadableRepresentations:
++ return str(self)
++ return "getWord" + repr((self.form, self.pos))
++
+ #
+ # Sequence protocol (a Word's elements are its Senses)
+ #
+- def __nonzero__(self):
+- return 1
++ def __bool__(self):
++ return 1
+
+ def __len__(self):
+- return len(self.getSenses())
++ return len(self.getSenses())
+
+ def __getitem__(self, index):
+- return self.getSenses()[index]
++ return self.getSenses()[index]
+
+ def __getslice__(self, i, j):
+- return self.getSenses()[i:j]
++ return self.getSenses()[i:j]
+
+
+ class Synset:
+@@ -354,157 +354,157 @@ class Synset:
+
+ def __init__(self, pos, offset, line):
+ "Initialize the synset from a line off a WN synset file."
+- self.pos = pos
++ self.pos = pos
+ "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB."
+- self.offset = offset
++ self.offset = offset
+ """integer offset into the part-of-speech file. Together
+ with pos, this can be used as a unique id."""
+- tokens = string.split(line[:string.index(line, '|')])
+- self.ssType = tokens[2]
+- self.gloss = string.strip(line[string.index(line, '|') + 1:])
++ tokens = string.split(line[:string.index(line, '|')])
++ self.ssType = tokens[2]
++ self.gloss = string.strip(line[string.index(line, '|') + 1:])
+ self.lexname = Lexname.lexnames[int(tokens[1])]
+- (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16))
+- (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0]))
+- if pos == VERB:
+- (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0]))
+- def extractVerbFrames(index, vfTuples):
+- return tuple(map(lambda t:string.atoi(t[1]), filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples)))
+- senseVerbFrames = []
+- for index in range(1, len(self._senseTuples) + 1):
+- senseVerbFrames.append(extractVerbFrames(index, vfTuples))
+- self._senseVerbFrames = senseVerbFrames
+- self.verbFrames = tuple(extractVerbFrames(None, vfTuples))
++ (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16))
++ (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0]))
++ if pos == VERB:
++ (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0]))
++ def extractVerbFrames(index, vfTuples):
++ return tuple([string.atoi(t[1]) for t in list(filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))])
++ senseVerbFrames = []
++ for index in range(1, len(self._senseTuples) + 1):
++ senseVerbFrames.append(extractVerbFrames(index, vfTuples))
++ self._senseVerbFrames = senseVerbFrames
++ self.verbFrames = tuple(extractVerbFrames(None, vfTuples))
+ """A sequence of integers that index into
+ VERB_FRAME_STRINGS. These list the verb frames that any
+ Sense in this synset participates in. (See also
+ Sense.verbFrames.) Defined only for verbs."""
+
+ def getSenses(self):
+- """Return a sequence of Senses.
+-
+- >>> N['dog'][0].getSenses()
+- ('dog' in {noun: dog, domestic dog, Canis familiaris},)
+- """
+- if not hasattr(self, '_senses'):
+- def loadSense(senseTuple, verbFrames=None, synset=self):
+- return Sense(synset, senseTuple, verbFrames)
+- if self.pos == VERB:
+- self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames))
+- del self._senseVerbFrames
+- else:
+- self._senses = tuple(map(loadSense, self._senseTuples))
+- del self._senseTuples
+- return self._senses
++ """Return a sequence of Senses.
++
++ >>> N['dog'][0].getSenses()
++ ('dog' in {noun: dog, domestic dog, Canis familiaris},)
++ """
++ if not hasattr(self, '_senses'):
++ def loadSense(senseTuple, verbFrames=None, synset=self):
++ return Sense(synset, senseTuple, verbFrames)
++ if self.pos == VERB:
++ self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames))
++ del self._senseVerbFrames
++ else:
++ self._senses = tuple(map(loadSense, self._senseTuples))
++ del self._senseTuples
++ return self._senses
+
+ senses = getSenses
+
+ def getPointers(self, pointerType=None):
+- """Return a sequence of Pointers.
++ """Return a sequence of Pointers.
+
+ If pointerType is specified, only pointers of that type are
+ returned. In this case, pointerType should be an element of
+ POINTER_TYPES.
+-
+- >>> N['dog'][0].getPointers()[:5]
+- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
+- >>> N['dog'][0].getPointers(HYPERNYM)
+- (hypernym -> {noun: canine, canid},)
+- """
+- if not hasattr(self, '_pointers'):
+- def loadPointer(tuple, synset=self):
+- return Pointer(synset.offset, tuple)
+- self._pointers = tuple(map(loadPointer, self._pointerTuples))
+- del self._pointerTuples
+- if pointerType == None:
+- return self._pointers
+- else:
+- _requirePointerType(pointerType)
+- return filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers)
++
++ >>> N['dog'][0].getPointers()[:5]
++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
++ >>> N['dog'][0].getPointers(HYPERNYM)
++ (hypernym -> {noun: canine, canid},)
++ """
++ if not hasattr(self, '_pointers'):
++ def loadPointer(tuple, synset=self):
++ return Pointer(synset.offset, tuple)
++ self._pointers = tuple(map(loadPointer, self._pointerTuples))
++ del self._pointerTuples
++ if pointerType == None:
++ return self._pointers
++ else:
++ _requirePointerType(pointerType)
++ return list(filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers))
+
+ pointers = getPointers # backwards compatability
+
+ def getPointerTargets(self, pointerType=None):
+- """Return a sequence of Senses or Synsets.
+-
++ """Return a sequence of Senses or Synsets.
++
+ If pointerType is specified, only targets of pointers of that
+ type are returned. In this case, pointerType should be an
+ element of POINTER_TYPES.
+-
+- >>> N['dog'][0].getPointerTargets()[:5]
+- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
+- >>> N['dog'][0].getPointerTargets(HYPERNYM)
+- [{noun: canine, canid}]
+- """
+- return map(Pointer.target, self.getPointers(pointerType))
++
++ >>> N['dog'][0].getPointerTargets()[:5]
++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
++ >>> N['dog'][0].getPointerTargets(HYPERNYM)
++ [{noun: canine, canid}]
++ """
++ return list(map(Pointer.target, self.getPointers(pointerType)))
+
+ pointerTargets = getPointerTargets # backwards compatability
+
+ def isTagged(self):
+- """Return 1 if any sense is tagged.
+-
+- >>> N['dog'][0].isTagged()
+- 1
+- >>> N['dog'][1].isTagged()
+- 0
+- """
+- return len(filter(Sense.isTagged, self.getSenses())) > 0
++ """Return 1 if any sense is tagged.
++
++ >>> N['dog'][0].isTagged()
++ 1
++ >>> N['dog'][1].isTagged()
++ 0
++ """
++ return len(list(filter(Sense.isTagged, self.getSenses()))) > 0
+
+ def __str__(self):
+- """Return a human-readable representation.
+-
+- >>> str(N['dog'][0].synset)
+- '{noun: dog, domestic dog, Canis familiaris}'
+- """
+- return "{" + self.pos + ": " + string.joinfields(map(lambda sense:sense.form, self.getSenses()), ", ") + "}"
++ """Return a human-readable representation.
++
++ >>> str(N['dog'][0].synset)
++ '{noun: dog, domestic dog, Canis familiaris}'
++ """
++ return "{" + self.pos + ": " + string.joinfields([sense.form for sense in self.getSenses()], ", ") + "}"
+
+ def __repr__(self):
+- """If ReadableRepresentations is true, return a human-readable
+- representation, e.g. 'dog(n.)'.
+-
+- If ReadableRepresentations is false, return a machine-readable
+- representation, e.g. "getSynset(pos, 1234)".
+- """
+- if ReadableRepresentations:
+- return str(self)
+- return "getSynset" + `(self.pos, self.offset)`
++ """If ReadableRepresentations is true, return a human-readable
++ representation, e.g. 'dog(n.)'.
++
++ If ReadableRepresentations is false, return a machine-readable
++ representation, e.g. "getSynset(pos, 1234)".
++ """
++ if ReadableRepresentations:
++ return str(self)
++ return "getSynset" + repr((self.pos, self.offset))
+
+ def __cmp__(self, other):
+- return _compareInstances(self, other, ('pos', 'offset'))
++ return _compareInstances(self, other, ('pos', 'offset'))
+
+ #
+ # Sequence protocol (a Synset's elements are its senses).
+ #
+- def __nonzero__(self):
+- return 1
++ def __bool__(self):
++ return 1
+
+ def __len__(self):
+- """
+- >>> len(N['dog'][0].synset)
+- 3
+- """
+- return len(self.getSenses())
++ """
++ >>> len(N['dog'][0].synset)
++ 3
++ """
++ return len(self.getSenses())
+
+ def __getitem__(self, idx):
+- """
+- >>> N['dog'][0].synset[0] == N['dog'][0]
+- 1
+- >>> N['dog'][0].synset['dog'] == N['dog'][0]
+- 1
+- >>> N['dog'][0].synset[N['dog']] == N['dog'][0]
+- 1
+- >>> N['cat'][6]
+- 'cat' in {noun: big cat, cat}
+- """
+- senses = self.getSenses()
+- if isinstance(idx, Word):
+- idx = idx.form
+- if isinstance(idx, StringType):
+- idx = _index(idx, map(lambda sense:sense.form, senses)) or \
+- _index(idx, map(lambda sense:sense.form, senses), _equalsIgnoreCase)
+- return senses[idx]
++ """
++ >>> N['dog'][0].synset[0] == N['dog'][0]
++ 1
++ >>> N['dog'][0].synset['dog'] == N['dog'][0]
++ 1
++ >>> N['dog'][0].synset[N['dog']] == N['dog'][0]
++ 1
++ >>> N['cat'][6]
++ 'cat' in {noun: big cat, cat}
++ """
++ senses = self.getSenses()
++ if isinstance(idx, Word):
++ idx = idx.form
++ if isinstance(idx, StringType):
++ idx = _index(idx, [sense.form for sense in senses]) or \
++ _index(idx, [sense.form for sense in senses], _equalsIgnoreCase)
++ return senses[idx]
+
+ def __getslice__(self, i, j):
+- return self.getSenses()[i:j]
++ return self.getSenses()[i:j]
+
+
+ class Sense:
+@@ -525,7 +525,7 @@ class Sense:
+ VERB_FRAME_STRINGS. These list the verb frames that this
+ Sense partipates in. Defined only for verbs.
+
+- >>> decide = V['decide'][0].synset # first synset for 'decide'
++ >>> decide = V['decide'][0].synset # first synset for 'decide'
+ >>> decide[0].verbFrames
+ (8, 2, 26, 29)
+ >>> decide[1].verbFrames
+@@ -536,124 +536,124 @@ class Sense:
+
+ def __init__(sense, synset, senseTuple, verbFrames=None):
+ "Initialize a sense from a synset's senseTuple."
+- # synset is stored by key (pos, synset) rather than object
+- # reference, to avoid creating a circular reference between
+- # Senses and Synsets that will prevent the vm from
+- # garbage-collecting them.
+- sense.pos = synset.pos
++ # synset is stored by key (pos, synset) rather than object
++ # reference, to avoid creating a circular reference between
++ # Senses and Synsets that will prevent the vm from
++ # garbage-collecting them.
++ sense.pos = synset.pos
+ "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"
+- sense.synsetOffset = synset.offset
++ sense.synsetOffset = synset.offset
+ "synset key. This is used to retrieve the sense."
+- sense.verbFrames = verbFrames
++ sense.verbFrames = verbFrames
+ """A sequence of integers that index into
+ VERB_FRAME_STRINGS. These list the verb frames that this
+ Sense partipates in. Defined only for verbs."""
+- (form, idString) = senseTuple
+- sense.position = None
+- if '(' in form:
+- index = string.index(form, '(')
+- key = form[index + 1:-1]
+- form = form[:index]
+- if key == 'a':
+- sense.position = ATTRIBUTIVE
+- elif key == 'p':
+- sense.position = PREDICATIVE
+- elif key == 'ip':
+- sense.position = IMMEDIATE_POSTNOMINAL
+- else:
+- raise "unknown attribute " + key
+- sense.form = string.replace(form, '_', ' ')
++ (form, idString) = senseTuple
++ sense.position = None
++ if '(' in form:
++ index = string.index(form, '(')
++ key = form[index + 1:-1]
++ form = form[:index]
++ if key == 'a':
++ sense.position = ATTRIBUTIVE
++ elif key == 'p':
++ sense.position = PREDICATIVE
++ elif key == 'ip':
++ sense.position = IMMEDIATE_POSTNOMINAL
++ else:
++ raise "unknown attribute " + key
++ sense.form = string.replace(form, '_', ' ')
+ "orthographic representation of the Word this is a Sense of."
+
+ def __getattr__(self, name):
+- # see the note at __init__ about why 'synset' is provided as a
+- # 'virtual' slot
+- if name == 'synset':
+- return getSynset(self.pos, self.synsetOffset)
++ # see the note at __init__ about why 'synset' is provided as a
++ # 'virtual' slot
++ if name == 'synset':
++ return getSynset(self.pos, self.synsetOffset)
+ elif name == 'lexname':
+ return self.synset.lexname
+- else:
+- raise AttributeError, name
++ else:
++ raise AttributeError(name)
+
+ def __str__(self):
+- """Return a human-readable representation.
+-
+- >>> str(N['dog'])
+- 'dog(n.)'
+- """
+- return `self.form` + " in " + str(self.synset)
++ """Return a human-readable representation.
++
++ >>> str(N['dog'])
++ 'dog(n.)'
++ """
++ return repr(self.form) + " in " + str(self.synset)
+
+ def __repr__(self):
+- """If ReadableRepresentations is true, return a human-readable
+- representation, e.g. 'dog(n.)'.
+-
+- If ReadableRepresentations is false, return a machine-readable
+- representation, e.g. "getWord('dog', 'noun')".
+- """
+- if ReadableRepresentations:
+- return str(self)
+- return "%s[%s]" % (`self.synset`, `self.form`)
++ """If ReadableRepresentations is true, return a human-readable
++ representation, e.g. 'dog(n.)'.
++
++ If ReadableRepresentations is false, return a machine-readable
++ representation, e.g. "getWord('dog', 'noun')".
++ """
++ if ReadableRepresentations:
++ return str(self)
++ return "%s[%s]" % (repr(self.synset), repr(self.form))
+
+ def getPointers(self, pointerType=None):
+- """Return a sequence of Pointers.
+-
++ """Return a sequence of Pointers.
++
+ If pointerType is specified, only pointers of that type are
+ returned. In this case, pointerType should be an element of
+ POINTER_TYPES.
+-
+- >>> N['dog'][0].getPointers()[:5]
+- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
+- >>> N['dog'][0].getPointers(HYPERNYM)
+- (hypernym -> {noun: canine, canid},)
+- """
+- senseIndex = _index(self, self.synset.getSenses())
+- def pointsFromThisSense(pointer, selfIndex=senseIndex):
+- return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex
+- return filter(pointsFromThisSense, self.synset.getPointers(pointerType))
++
++ >>> N['dog'][0].getPointers()[:5]
++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
++ >>> N['dog'][0].getPointers(HYPERNYM)
++ (hypernym -> {noun: canine, canid},)
++ """
++ senseIndex = _index(self, self.synset.getSenses())
++ def pointsFromThisSense(pointer, selfIndex=senseIndex):
++ return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex
++ return list(filter(pointsFromThisSense, self.synset.getPointers(pointerType)))
+
+ pointers = getPointers # backwards compatability
+
+ def getPointerTargets(self, pointerType=None):
+- """Return a sequence of Senses or Synsets.
+-
++ """Return a sequence of Senses or Synsets.
++
+ If pointerType is specified, only targets of pointers of that
+ type are returned. In this case, pointerType should be an
+ element of POINTER_TYPES.
+-
+- >>> N['dog'][0].getPointerTargets()[:5]
+- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
+- >>> N['dog'][0].getPointerTargets(HYPERNYM)
+- [{noun: canine, canid}]
+- """
+- return map(Pointer.target, self.getPointers(pointerType))
++
++ >>> N['dog'][0].getPointerTargets()[:5]
++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
++ >>> N['dog'][0].getPointerTargets(HYPERNYM)
++ [{noun: canine, canid}]
++ """
++ return list(map(Pointer.target, self.getPointers(pointerType)))
+
+ pointerTargets = getPointerTargets # backwards compatability
+
+ def getSenses(self):
+- return self,
++ return self,
+
+ senses = getSenses # backwards compatability
+
+ def isTagged(self):
+- """Return 1 if any sense is tagged.
+-
+- >>> N['dog'][0].isTagged()
+- 1
+- >>> N['dog'][1].isTagged()
+- 0
+- """
+- word = self.word()
+- return _index(self, word.getSenses()) < word.taggedSenseCount
++ """Return 1 if any sense is tagged.
++
++ >>> N['dog'][0].isTagged()
++ 1
++ >>> N['dog'][1].isTagged()
++ 0
++ """
++ word = self.word()
++ return _index(self, word.getSenses()) < word.taggedSenseCount
+
+ def getWord(self):
+- return getWord(self.form, self.pos)
++ return getWord(self.form, self.pos)
+
+ word = getWord # backwards compatability
+
+ def __cmp__(self, other):
+- def senseIndex(sense, synset=self.synset):
+- return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form)
+- return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other))
++ def senseIndex(sense, synset=self.synset):
++ return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form)
++ return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other))
+
+
+ class Pointer:
+@@ -668,21 +668,21 @@ class Pointer:
+ """
+
+ _POINTER_TYPE_TABLE = {
+- '!': ANTONYM,
++ '!': ANTONYM,
+ '@': HYPERNYM,
+ '~': HYPONYM,
+- '=': ATTRIBUTE,
++ '=': ATTRIBUTE,
+ '^': ALSO_SEE,
+ '*': ENTAILMENT,
+ '>': CAUSE,
+- '$': VERB_GROUP,
+- '#m': MEMBER_MERONYM,
++ '$': VERB_GROUP,
++ '#m': MEMBER_MERONYM,
+ '#s': SUBSTANCE_MERONYM,
+ '#p': PART_MERONYM,
+- '%m': MEMBER_HOLONYM,
++ '%m': MEMBER_HOLONYM,
+ '%s': SUBSTANCE_HOLONYM,
+ '%p': PART_HOLONYM,
+- '&': SIMILAR,
++ '&': SIMILAR,
+ '<': PARTICIPLE_OF,
+ '\\': PERTAINYM,
+ # New in wn 2.0:
+@@ -696,51 +696,51 @@ class Pointer:
+ }
+
+ def __init__(self, sourceOffset, pointerTuple):
+- (type, offset, pos, indices) = pointerTuple
+- self.type = Pointer._POINTER_TYPE_TABLE[type]
++ (type, offset, pos, indices) = pointerTuple
++ self.type = Pointer._POINTER_TYPE_TABLE[type]
+ """One of POINTER_TYPES."""
+- self.sourceOffset = sourceOffset
+- self.targetOffset = int(offset)
+- self.pos = _normalizePOS(pos)
++ self.sourceOffset = sourceOffset
++ self.targetOffset = int(offset)
++ self.pos = _normalizePOS(pos)
+ """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"""
+- indices = string.atoi(indices, 16)
+- self.sourceIndex = indices >> 8
+- self.targetIndex = indices & 255
++ indices = string.atoi(indices, 16)
++ self.sourceIndex = indices >> 8
++ self.targetIndex = indices & 255
+
+ def getSource(self):
+- synset = getSynset(self.pos, self.sourceOffset)
+- if self.sourceIndex:
+- return synset[self.sourceIndex - 1]
+- else:
+- return synset
++ synset = getSynset(self.pos, self.sourceOffset)
++ if self.sourceIndex:
++ return synset[self.sourceIndex - 1]
++ else:
++ return synset
+
+ source = getSource # backwards compatability
+
+ def getTarget(self):
+- synset = getSynset(self.pos, self.targetOffset)
+- if self.targetIndex:
+- return synset[self.targetIndex - 1]
+- else:
+- return synset
++ synset = getSynset(self.pos, self.targetOffset)
++ if self.targetIndex:
++ return synset[self.targetIndex - 1]
++ else:
++ return synset
+
+ target = getTarget # backwards compatability
+
+ def __str__(self):
+- return self.type + " -> " + str(self.target())
++ return self.type + " -> " + str(self.target())
+
+ def __repr__(self):
+- if ReadableRepresentations:
+- return str(self)
+- return "<" + str(self) + ">"
++ if ReadableRepresentations:
++ return str(self)
++ return "<" + str(self) + ">"
+
+ def __cmp__(self, other):
+- diff = _compareInstances(self, other, ('pos', 'sourceOffset'))
+- if diff:
+- return diff
+- synset = self.source()
+- def pointerIndex(sense, synset=synset):
+- return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex')))
+- return cmp(pointerIndex(self), pointerIndex(other))
++ diff = _compareInstances(self, other, ('pos', 'sourceOffset'))
++ if diff:
++ return diff
++ synset = self.source()
++ def pointerIndex(sense, synset=synset):
++ return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex')))
++ return cmp(pointerIndex(self), pointerIndex(other))
+
+
+ # Loading the lexnames
+@@ -794,59 +794,59 @@ class Dictionary:
+ """
+
+ def __init__(self, pos, filenameroot):
+- self.pos = pos
++ self.pos = pos
+ """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"""
+- self.indexFile = _IndexFile(pos, filenameroot)
+- self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE)
++ self.indexFile = _IndexFile(pos, filenameroot)
++ self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE)
+
+ def __repr__(self):
+- dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'}
+- if dictionaryVariables.get(self):
+- return self.__module__ + "." + dictionaryVariables[self]
+- return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos)
++ dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'}
++ if dictionaryVariables.get(self):
++ return self.__module__ + "." + dictionaryVariables[self]
++ return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos)
+
+ def getWord(self, form, line=None):
+- key = string.replace(string.lower(form), ' ', '_')
+- pos = self.pos
+- def loader(key=key, line=line, indexFile=self.indexFile):
+- line = line or indexFile.get(key)
+- return line and Word(line)
+- word = _entityCache.get((pos, key), loader)
+- if word:
+- return word
+- else:
+- raise KeyError, "%s is not in the %s database" % (`form`, `pos`)
++ key = string.replace(string.lower(form), ' ', '_')
++ pos = self.pos
++ def loader(key=key, line=line, indexFile=self.indexFile):
++ line = line or indexFile.get(key)
++ return line and Word(line)
++ word = _entityCache.get((pos, key), loader)
++ if word:
++ return word
++ else:
++ raise KeyError("%s is not in the %s database" % (repr(form), repr(pos)))
+
+ def getSynset(self, offset):
+- pos = self.pos
+- def loader(pos=pos, offset=offset, dataFile=self.dataFile):
+- return Synset(pos, offset, _lineAt(dataFile, offset))
+- return _entityCache.get((pos, offset), loader)
++ pos = self.pos
++ def loader(pos=pos, offset=offset, dataFile=self.dataFile):
++ return Synset(pos, offset, _lineAt(dataFile, offset))
++ return _entityCache.get((pos, offset), loader)
+
+ def _buildIndexCacheFile(self):
+- self.indexFile._buildIndexCacheFile()
++ self.indexFile._buildIndexCacheFile()
+
+ #
+ # Sequence protocol (a Dictionary's items are its Words)
+ #
+- def __nonzero__(self):
+- """Return false. (This is to avoid scanning the whole index file
+- to compute len when a Dictionary is used in test position.)
+-
+- >>> N and 'true'
+- 'true'
+- """
+- return 1
++ def __bool__(self):
++ """Return false. (This is to avoid scanning the whole index file
++ to compute len when a Dictionary is used in test position.)
++
++ >>> N and 'true'
++ 'true'
++ """
++ return 1
+
+ def __len__(self):
+- """Return the number of index entries.
+-
+- >>> len(ADJ)
+- 21435
+- """
+- if not hasattr(self, 'length'):
+- self.length = len(self.indexFile)
+- return self.length
++ """Return the number of index entries.
++
++ >>> len(ADJ)
++ 21435
++ """
++ if not hasattr(self, 'length'):
++ self.length = len(self.indexFile)
++ return self.length
+
+ def __getslice__(self, a, b):
+ results = []
+@@ -860,22 +860,22 @@ class Dictionary:
+ return results
+
+ def __getitem__(self, index):
+- """If index is a String, return the Word whose form is
+- index. If index is an integer n, return the Word
+- indexed by the n'th Word in the Index file.
+-
+- >>> N['dog']
+- dog(n.)
+- >>> N[0]
+- 'hood(n.)
+- """
+- if isinstance(index, StringType):
+- return self.getWord(index)
+- elif isinstance(index, IntType):
+- line = self.indexFile[index]
+- return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
+- else:
+- raise TypeError, "%s is not a String or Int" % `index`
++ """If index is a String, return the Word whose form is
++ index. If index is an integer n, return the Word
++ indexed by the n'th Word in the Index file.
++
++ >>> N['dog']
++ dog(n.)
++ >>> N[0]
++ 'hood(n.)
++ """
++ if isinstance(index, StringType):
++ return self.getWord(index)
++ elif isinstance(index, IntType):
++ line = self.indexFile[index]
++ return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
++ else:
++ raise TypeError("%s is not a String or Int" % repr(index))
+
+ #
+ # Dictionary protocol
+@@ -884,54 +884,54 @@ class Dictionary:
+ #
+
+ def get(self, key, default=None):
+- """Return the Word whose form is _key_, or _default_.
+-
+- >>> N.get('dog')
+- dog(n.)
+- >>> N.get('inu')
+- """
+- try:
+- return self[key]
+- except LookupError:
+- return default
++ """Return the Word whose form is _key_, or _default_.
++
++ >>> N.get('dog')
++ dog(n.)
++ >>> N.get('inu')
++ """
++ try:
++ return self[key]
++ except LookupError:
++ return default
+
+ def keys(self):
+- """Return a sorted list of strings that index words in this
+- dictionary."""
+- return self.indexFile.keys()
++ """Return a sorted list of strings that index words in this
++ dictionary."""
++ return list(self.indexFile.keys())
+
+ def has_key(self, form):
+- """Return true iff the argument indexes a word in this dictionary.
+-
+- >>> N.has_key('dog')
+- 1
+- >>> N.has_key('inu')
+- 0
+- """
+- return self.indexFile.has_key(form)
++ """Return true iff the argument indexes a word in this dictionary.
++
++ >>> N.has_key('dog')
++ 1
++ >>> N.has_key('inu')
++ 0
++ """
++ return form in self.indexFile
+
+ #
+ # Testing
+ #
+
+ def _testKeys(self):
+- """Verify that index lookup can find each word in the index file."""
+- print "Testing: ", self
+- file = open(self.indexFile.file.name, _FILE_OPEN_MODE)
+- counter = 0
+- while 1:
+- line = file.readline()
+- if line == '': break
+- if line[0] != ' ':
+- key = string.replace(line[:string.find(line, ' ')], '_', ' ')
+- if (counter % 1000) == 0:
+- print "%s..." % (key,),
+- import sys
+- sys.stdout.flush()
+- counter = counter + 1
+- self[key]
+- file.close()
+- print "done."
++ """Verify that index lookup can find each word in the index file."""
++ print("Testing: ", self)
++ file = open(self.indexFile.file.name, _FILE_OPEN_MODE)
++ counter = 0
++ while 1:
++ line = file.readline()
++ if line == '': break
++ if line[0] != ' ':
++ key = string.replace(line[:string.find(line, ' ')], '_', ' ')
++ if (counter % 1000) == 0:
++ print("%s..." % (key,), end=' ')
++ import sys
++ sys.stdout.flush()
++ counter = counter + 1
++ self[key]
++ file.close()
++ print("done.")
+
+
+ class _IndexFile:
+@@ -939,69 +939,69 @@ class _IndexFile:
+ Sequence and Dictionary interface to a sorted index file."""
+
+ def __init__(self, pos, filenameroot):
+- self.pos = pos
+- self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE)
+- self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset)
+- self.rewind()
+- self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx")
+- try:
+- import shelve
+- self.indexCache = shelve.open(self.shelfname, 'r')
+- except:
+- pass
++ self.pos = pos
++ self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE)
++ self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset)
++ self.rewind()
++ self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx")
++ try:
++ import shelve
++ self.indexCache = shelve.open(self.shelfname, 'r')
++ except:
++ pass
+
+ def rewind(self):
+- self.file.seek(0)
+- while 1:
+- offset = self.file.tell()
+- line = self.file.readline()
+- if (line[0] != ' '):
+- break
+- self.nextIndex = 0
+- self.nextOffset = offset
++ self.file.seek(0)
++ while 1:
++ offset = self.file.tell()
++ line = self.file.readline()
++ if (line[0] != ' '):
++ break
++ self.nextIndex = 0
++ self.nextOffset = offset
+
+ #
+ # Sequence protocol (an _IndexFile's items are its lines)
+ #
+- def __nonzero__(self):
+- return 1
++ def __bool__(self):
++ return 1
+
+ def __len__(self):
+- if hasattr(self, 'indexCache'):
+- return len(self.indexCache)
+- self.rewind()
+- lines = 0
+- while 1:
+- line = self.file.readline()
+- if line == "":
+- break
+- lines = lines + 1
+- return lines
++ if hasattr(self, 'indexCache'):
++ return len(self.indexCache)
++ self.rewind()
++ lines = 0
++ while 1:
++ line = self.file.readline()
++ if line == "":
++ break
++ lines = lines + 1
++ return lines
+
+- def __nonzero__(self):
+- return 1
++ def __bool__(self):
++ return 1
+
+ def __getitem__(self, index):
+- if isinstance(index, StringType):
+- if hasattr(self, 'indexCache'):
+- return self.indexCache[index]
+- return binarySearchFile(self.file, index, self.offsetLineCache, 8)
+- elif isinstance(index, IntType):
+- if hasattr(self, 'indexCache'):
+- return self.get(self.keys[index])
+- if index < self.nextIndex:
+- self.rewind()
+- while self.nextIndex <= index:
+- self.file.seek(self.nextOffset)
+- line = self.file.readline()
+- if line == "":
+- raise IndexError, "index out of range"
+- self.nextIndex = self.nextIndex + 1
+- self.nextOffset = self.file.tell()
+- return line
+- else:
+- raise TypeError, "%s is not a String or Int" % `index`
+-
++ if isinstance(index, StringType):
++ if hasattr(self, 'indexCache'):
++ return self.indexCache[index]
++ return binarySearchFile(self.file, index, self.offsetLineCache, 8)
++ elif isinstance(index, IntType):
++ if hasattr(self, 'indexCache'):
++ return self.get(self.keys[index])
++ if index < self.nextIndex:
++ self.rewind()
++ while self.nextIndex <= index:
++ self.file.seek(self.nextOffset)
++ line = self.file.readline()
++ if line == "":
++ raise IndexError("index out of range")
++ self.nextIndex = self.nextIndex + 1
++ self.nextOffset = self.file.tell()
++ return line
++ else:
++ raise TypeError("%s is not a String or Int" % repr(index))
++
+ #
+ # Dictionary protocol
+ #
+@@ -1009,62 +1009,62 @@ class _IndexFile:
+ #
+
+ def get(self, key, default=None):
+- try:
+- return self[key]
+- except LookupError:
+- return default
++ try:
++ return self[key]
++ except LookupError:
++ return default
+
+ def keys(self):
+- if hasattr(self, 'indexCache'):
+- keys = self.indexCache.keys()
+- keys.sort()
+- return keys
+- else:
+- keys = []
+- self.rewind()
+- while 1:
+- line = self.file.readline()
+- if not line: break
++ if hasattr(self, 'indexCache'):
++ keys = list(self.indexCache.keys())
++ keys.sort()
++ return keys
++ else:
++ keys = []
++ self.rewind()
++ while 1:
++ line = self.file.readline()
++ if not line: break
+ key = line.split(' ', 1)[0]
+- keys.append(key.replace('_', ' '))
+- return keys
++ keys.append(key.replace('_', ' '))
++ return keys
+
+ def has_key(self, key):
+- key = key.replace(' ', '_') # test case: V['haze over']
+- if hasattr(self, 'indexCache'):
+- return self.indexCache.has_key(key)
+- return self.get(key) != None
++ key = key.replace(' ', '_') # test case: V['haze over']
++ if hasattr(self, 'indexCache'):
++ return key in self.indexCache
++ return self.get(key) != None
+
+ #
+ # Index file
+ #
+
+ def _buildIndexCacheFile(self):
+- import shelve
+- import os
+- print "Building %s:" % (self.shelfname,),
+- tempname = self.shelfname + ".temp"
+- try:
+- indexCache = shelve.open(tempname)
+- self.rewind()
+- count = 0
+- while 1:
+- offset, line = self.file.tell(), self.file.readline()
+- if not line: break
+- key = line[:string.find(line, ' ')]
+- if (count % 1000) == 0:
+- print "%s..." % (key,),
+- import sys
+- sys.stdout.flush()
+- indexCache[key] = line
+- count = count + 1
+- indexCache.close()
+- os.rename(tempname, self.shelfname)
+- finally:
+- try: os.remove(tempname)
+- except: pass
+- print "done."
+- self.indexCache = shelve.open(self.shelfname, 'r')
++ import shelve
++ import os
++ print("Building %s:" % (self.shelfname,), end=' ')
++ tempname = self.shelfname + ".temp"
++ try:
++ indexCache = shelve.open(tempname)
++ self.rewind()
++ count = 0
++ while 1:
++ offset, line = self.file.tell(), self.file.readline()
++ if not line: break
++ key = line[:string.find(line, ' ')]
++ if (count % 1000) == 0:
++ print("%s..." % (key,), end=' ')
++ import sys
++ sys.stdout.flush()
++ indexCache[key] = line
++ count = count + 1
++ indexCache.close()
++ os.rename(tempname, self.shelfname)
++ finally:
++ try: os.remove(tempname)
++ except: pass
++ print("done.")
++ self.indexCache = shelve.open(self.shelfname, 'r')
+
+
+ #
+@@ -1091,20 +1091,20 @@ getword, getsense, getsynset = getWord, getSense, getS
+
+ def _requirePointerType(pointerType):
+ if pointerType not in POINTER_TYPES:
+- raise TypeError, `pointerType` + " is not a pointer type"
++ raise TypeError(repr(pointerType) + " is not a pointer type")
+ return pointerType
+
+ def _compareInstances(a, b, fields):
+ """"Return -1, 0, or 1 according to a comparison first by type,
+ then by class, and finally by each of fields.""" # " <- for emacs
+ if not hasattr(b, '__class__'):
+- return cmp(type(a), type(b))
++ return cmp(type(a), type(b))
+ elif a.__class__ != b.__class__:
+- return cmp(a.__class__, b.__class__)
++ return cmp(a.__class__, b.__class__)
+ for field in fields:
+- diff = cmp(getattr(a, field), getattr(b, field))
+- if diff:
+- return diff
++ diff = cmp(getattr(a, field), getattr(b, field))
++ if diff:
++ return diff
+ return 0
+
+ def _equalsIgnoreCase(a, b):
+@@ -1122,14 +1122,14 @@ def _equalsIgnoreCase(a, b):
+ #
+ def _dataFilePathname(filenameroot):
+ if os.name in ('dos', 'nt'):
+- path = os.path.join(WNSEARCHDIR, filenameroot + ".dat")
++ path = os.path.join(WNSEARCHDIR, filenameroot + ".dat")
+ if os.path.exists(path):
+ return path
+ return os.path.join(WNSEARCHDIR, "data." + filenameroot)
+
+ def _indexFilePathname(filenameroot):
+ if os.name in ('dos', 'nt'):
+- path = os.path.join(WNSEARCHDIR, filenameroot + ".idx")
++ path = os.path.join(WNSEARCHDIR, filenameroot + ".idx")
+ if os.path.exists(path):
+ return path
+ return os.path.join(WNSEARCHDIR, "index." + filenameroot)
+@@ -1146,30 +1146,30 @@ def binarySearchFile(file, key, cache={}, cacheDepth=-
+ #if count > 20:
+ # raise "infinite loop"
+ lastState = start, end
+- middle = (start + end) / 2
+- if cache.get(middle):
+- offset, line = cache[middle]
+- else:
+- file.seek(max(0, middle - 1))
+- if middle > 0:
+- file.readline()
+- offset, line = file.tell(), file.readline()
+- if currentDepth < cacheDepth:
+- cache[middle] = (offset, line)
++ middle = (start + end) / 2
++ if cache.get(middle):
++ offset, line = cache[middle]
++ else:
++ file.seek(max(0, middle - 1))
++ if middle > 0:
++ file.readline()
++ offset, line = file.tell(), file.readline()
++ if currentDepth < cacheDepth:
++ cache[middle] = (offset, line)
+ #print start, middle, end, offset, line,
+- if offset > end:
+- assert end != middle - 1, "infinite loop"
+- end = middle - 1
+- elif line[:keylen] == key:# and line[keylen + 1] == ' ':
+- return line
++ if offset > end:
++ assert end != middle - 1, "infinite loop"
++ end = middle - 1
++ elif line[:keylen] == key:# and line[keylen + 1] == ' ':
++ return line
+ #elif offset == end:
+ # return None
+- elif line > key:
+- assert end != middle - 1, "infinite loop"
+- end = middle - 1
+- elif line < key:
+- start = offset + len(line) - 1
+- currentDepth = currentDepth + 1
++ elif line > key:
++ assert end != middle - 1, "infinite loop"
++ end = middle - 1
++ elif line < key:
++ start = offset + len(line) - 1
++ currentDepth = currentDepth + 1
+ thisState = start, end
+ if lastState == thisState:
+ # detects the condition where we're searching past the end
+@@ -1198,12 +1198,12 @@ def _index(key, sequence, testfn=None, keyfn=None):
+ """
+ index = 0
+ for element in sequence:
+- value = element
+- if keyfn:
+- value = keyfn(value)
+- if (not testfn and value == key) or (testfn and testfn(value, key)):
+- return index
+- index = index + 1
++ value = element
++ if keyfn:
++ value = keyfn(value)
++ if (not testfn and value == key) or (testfn and testfn(value, key)):
++ return index
++ index = index + 1
+ return None
+
+ def _partition(sequence, size, count):
+@@ -1216,7 +1216,7 @@ def _partition(sequence, size, count):
+
+ partitions = []
+ for index in range(0, size * count, size):
+- partitions.append(sequence[index:index + size])
++ partitions.append(sequence[index:index + size])
+ return (partitions, sequence[size * count:])
+
+
+@@ -1261,49 +1261,49 @@ class _LRUCache:
+ but the two implementations aren't directly comparable."""
+
+ def __init__(this, capacity):
+- this.capacity = capacity
+- this.clear()
++ this.capacity = capacity
++ this.clear()
+
+ def clear(this):
+- this.values = {}
+- this.history = {}
+- this.oldestTimestamp = 0
+- this.nextTimestamp = 1
++ this.values = {}
++ this.history = {}
++ this.oldestTimestamp = 0
++ this.nextTimestamp = 1
+
+ def removeOldestEntry(this):
+- while this.oldestTimestamp < this.nextTimestamp:
+- if this.history.get(this.oldestTimestamp):
+- key = this.history[this.oldestTimestamp]
+- del this.history[this.oldestTimestamp]
+- del this.values[key]
+- return
+- this.oldestTimestamp = this.oldestTimestamp + 1
++ while this.oldestTimestamp < this.nextTimestamp:
++ if this.history.get(this.oldestTimestamp):
++ key = this.history[this.oldestTimestamp]
++ del this.history[this.oldestTimestamp]
++ del this.values[key]
++ return
++ this.oldestTimestamp = this.oldestTimestamp + 1
+
+ def setCapacity(this, capacity):
+- if capacity == 0:
+- this.clear()
+- else:
+- this.capacity = capacity
+- while len(this.values) > this.capacity:
+- this.removeOldestEntry()
++ if capacity == 0:
++ this.clear()
++ else:
++ this.capacity = capacity
++ while len(this.values) > this.capacity:
++ this.removeOldestEntry()
+
+ def get(this, key, loadfn=None):
+- value = None
+- if this.values:
+- pair = this.values.get(key)
+- if pair:
+- (value, timestamp) = pair
+- del this.history[timestamp]
+- if value == None:
+- value = loadfn and loadfn()
+- if this.values != None:
+- timestamp = this.nextTimestamp
+- this.nextTimestamp = this.nextTimestamp + 1
+- this.values[key] = (value, timestamp)
+- this.history[timestamp] = key
+- if len(this.values) > this.capacity:
+- this.removeOldestEntry()
+- return value
++ value = None
++ if this.values:
++ pair = this.values.get(key)
++ if pair:
++ (value, timestamp) = pair
++ del this.history[timestamp]
++ if value == None:
++ value = loadfn and loadfn()
++ if this.values != None:
++ timestamp = this.nextTimestamp
++ this.nextTimestamp = this.nextTimestamp + 1
++ this.values[key] = (value, timestamp)
++ this.history[timestamp] = key
++ if len(this.values) > this.capacity:
++ this.removeOldestEntry()
++ return value
+
+
+ class _NullCache:
+@@ -1311,10 +1311,10 @@ class _NullCache:
+ LRUCache implements), but doesn't store any values."""
+
+ def clear():
+- pass
++ pass
+
+ def get(this, key, loadfn=None):
+- return loadfn and loadfn()
++ return loadfn and loadfn()
+
+
+ DEFAULT_CACHE_CAPACITY = 1000
+@@ -1327,7 +1327,7 @@ def disableCache():
+ def enableCache():
+ """Enable the entity cache."""
+ if not isinstance(_entityCache, LRUCache):
+- _entityCache = _LRUCache(size)
++ _entityCache = _LRUCache(size)
+
+ def clearCache():
+ """Clear the entity cache."""
+@@ -1365,36 +1365,36 @@ def _initializePOSTables():
+ _POSNormalizationTable = {}
+ _POStoDictionaryTable = {}
+ for pos, abbreviations in (
+- (NOUN, "noun n n."),
+- (VERB, "verb v v."),
+- (ADJECTIVE, "adjective adj adj. a s"),
+- (ADVERB, "adverb adv adv. r")):
+- tokens = string.split(abbreviations)
+- for token in tokens:
+- _POSNormalizationTable[token] = pos
+- _POSNormalizationTable[string.upper(token)] = pos
++ (NOUN, "noun n n."),
++ (VERB, "verb v v."),
++ (ADJECTIVE, "adjective adj adj. a s"),
++ (ADVERB, "adverb adv adv. r")):
++ tokens = string.split(abbreviations)
++ for token in tokens:
++ _POSNormalizationTable[token] = pos
++ _POSNormalizationTable[string.upper(token)] = pos
+ for dict in Dictionaries:
+- _POSNormalizationTable[dict] = dict.pos
+- _POStoDictionaryTable[dict.pos] = dict
++ _POSNormalizationTable[dict] = dict.pos
++ _POStoDictionaryTable[dict.pos] = dict
+
+ _initializePOSTables()
+
+ def _normalizePOS(pos):
+ norm = _POSNormalizationTable.get(pos)
+ if norm:
+- return norm
+- raise TypeError, `pos` + " is not a part of speech type"
++ return norm
++ raise TypeError(repr(pos) + " is not a part of speech type")
+
+ def _dictionaryFor(pos):
+ pos = _normalizePOS(pos)
+ dict = _POStoDictionaryTable.get(pos)
+ if dict == None:
+- raise RuntimeError, "The " + `pos` + " dictionary has not been created"
++ raise RuntimeError("The " + repr(pos) + " dictionary has not been created")
+ return dict
+
+ def buildIndexFiles():
+ for dict in Dictionaries:
+- dict._buildIndexCacheFile()
++ dict._buildIndexCacheFile()
+
+
+ #
+@@ -1404,7 +1404,7 @@ def buildIndexFiles():
+ def _testKeys():
+ #This is slow, so don't do it as part of the normal test procedure.
+ for dictionary in Dictionaries:
+- dictionary._testKeys()
++ dictionary._testKeys()
+
+ def _test(reset=0):
+ import doctest, wordnet