1 Star 0 Fork 0

Darssin/mythes-en

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
mythes-en.python3.patch 53.26 KB
一键复制 编辑 原始数据 按行查看 历史
Darssin 提交于 2023-09-07 16:34 . project init
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631
diff -ru mythes-en-3.0/wn2ooo/wn2ooo.py mythes-en-3.0.fixed/wn2ooo/wn2ooo.py
--- mythes-en-3.0/wn2ooo/wn2ooo.py 2005-07-23 23:21:20.000000000 +0100
+++ mythes-en-3.0.fixed/wn2ooo/wn2ooo.py 2018-07-30 14:46:52.695201526 +0100
@@ -65,7 +65,7 @@
entry = getRelatedTerms(u, HYPERNYM, '')
try:
listpos = l.index(entry)
- except ValueError, e:
+ except ValueError as e:
l.append(entry)
return str.join("|", l)
@@ -74,12 +74,12 @@
for word in words:
l = []
if c % 100 == 0:
- print >> sys.stderr, "Working on word %d" % c
+ print("Working on word %d" % c, file=sys.stderr)
for pos in [ADJ, N, V, ADV]:
try:
for s in pos[word].getSenses():
l.append(s)
- except KeyError, e:
+ except KeyError as e:
#print >> sys.stderr, e
continue
syn_count = 0
@@ -118,7 +118,7 @@
syn_lines = "%s%s" % (syn_lines, more_generic_terms)
syn_count = syn_count + 1
if syn_count > 0:
- print "%s|%d\n%s" % (word, syn_count, syn_lines)
+ print("%s|%d\n%s" % (word, syn_count, syn_lines))
c = c + 1
return
@@ -132,40 +132,38 @@
return s
def main():
- print "ISO8859-1"
+ print("ISO8859-1")
words = {}
dic = Dictionary(ADJECTIVE, "adj")
- for w in dic.keys():
+ for w in list(dic.keys()):
words[w] = None
dic = Dictionary(NOUN, "noun")
- for w in dic.keys():
+ for w in list(dic.keys()):
words[w] = None
dic = Dictionary(VERB, "verb")
- for w in dic.keys():
+ for w in list(dic.keys()):
words[w] = None
dic = Dictionary(ADVERB, "adv")
- for w in dic.keys():
+ for w in list(dic.keys()):
words[w] = None
- words = words.keys()
+ words = list(words.keys())
# tests:
#words = ['dog', 'house', 'nullipara']
#words = ['absent', 'whistle stop']
#words = ['war']
- print >>sys.stderr, "Dictionaries contain %d words" % len(words)
- print >>sys.stderr, "Sorting..."
- words.sort(mycompare)
+ print("Dictionaries contain %d words" % len(words), file=sys.stderr)
+ print("Sorting...", file=sys.stderr)
+ words = sorted(words, key=mycompare)
printSynsForWords(words)
return
-def mycompare(a, b):
+def mycompare(elem):
# stupid hack to make sure the list is sorted like Kevin's original list:
- a = a.replace(" ", "Z")
- b = b.replace(" ", "Z")
- return cmp(a, b)
+ return elem.replace(" ", "Z")
main()
diff -ru mythes-en-3.0/wn2ooo/wordnet.py mythes-en-3.0.fixed/wn2ooo/wordnet.py
--- mythes-en-3.0/wn2ooo/wordnet.py 2005-07-23 23:21:16.000000000 +0100
+++ mythes-en-3.0.fixed/wn2ooo/wordnet.py 2018-07-30 14:46:52.695201526 +0100
@@ -44,7 +44,6 @@
import string
import os
from os import environ
-from types import IntType, ListType, StringType, TupleType
#
@@ -212,15 +211,15 @@
def __init__(self, line):
"""Initialize the word from a line of a WN POS file."""
- tokens = string.split(line)
- ints = map(int, tokens[int(tokens[3]) + 4:])
- self.form = string.replace(tokens[0], '_', ' ')
+ tokens = line.split()
+ ints = list(map(int, tokens[int(tokens[3]) + 4:]))
+ self.form = tokens[0].replace('_', ' ')
"Orthographic representation of the word."
- self.pos = _normalizePOS(tokens[1])
+ self.pos = _normalizePOS(tokens[1])
"Part of speech. One of NOUN, VERB, ADJECTIVE, ADVERB."
- self.taggedSenseCount = ints[1]
+ self.taggedSenseCount = ints[1]
"Number of senses that are tagged."
- self._synsetOffsets = ints[2:ints[0]+2]
+ self._synsetOffsets = ints[2:ints[0]+2]
def getPointers(self, pointerType=None):
"""Pointers connect senses and synsets, not words.
@@ -233,17 +232,17 @@
raise self.getPointers.__doc__
def getSenses(self):
- """Return a sequence of senses.
-
- >>> N['dog'].getSenses()
- ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron})
- """
- if not hasattr(self, '_senses'):
- def getSense(offset, pos=self.pos, form=self.form):
- return getSynset(pos, offset)[form]
- self._senses = tuple(map(getSense, self._synsetOffsets))
- del self._synsetOffsets
- return self._senses
+ """Return a sequence of senses.
+
+ >>> N['dog'].getSenses()
+ ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron})
+ """
+ if not hasattr(self, '_senses'):
+ def getSense(offset, pos=self.pos, form=self.form):
+ return getSynset(pos, offset)[form]
+ self._senses = tuple(map(getSense, self._synsetOffsets))
+ del self._synsetOffsets
+ return self._senses
# Deprecated. Present for backwards compatability.
def senses(self):
@@ -255,70 +254,70 @@
return self.getSense()
def isTagged(self):
- """Return 1 if any sense is tagged.
-
- >>> N['dog'].isTagged()
- 1
- """
- return self.taggedSenseCount > 0
+ """Return 1 if any sense is tagged.
+
+ >>> N['dog'].isTagged()
+ 1
+ """
+ return self.taggedSenseCount > 0
def getAdjectivePositions(self):
- """Return a sequence of adjective positions that this word can
- appear in. These are elements of ADJECTIVE_POSITIONS.
-
- >>> ADJ['clear'].getAdjectivePositions()
- [None, 'predicative']
- """
- positions = {}
- for sense in self.getSenses():
- positions[sense.position] = 1
- return positions.keys()
+ """Return a sequence of adjective positions that this word can
+ appear in. These are elements of ADJECTIVE_POSITIONS.
+
+ >>> ADJ['clear'].getAdjectivePositions()
+ [None, 'predicative']
+ """
+ positions = {}
+ for sense in self.getSenses():
+ positions[sense.position] = 1
+ return list(positions.keys())
adjectivePositions = getAdjectivePositions # backwards compatability
def __cmp__(self, other):
- """
- >>> N['cat'] < N['dog']
- 1
- >>> N['dog'] < V['dog']
- 1
- """
- return _compareInstances(self, other, ('pos', 'form'))
+ """
+ >>> N['cat'] < N['dog']
+ 1
+ >>> N['dog'] < V['dog']
+ 1
+ """
+ return _compareInstances(self, other, ('pos', 'form'))
def __str__(self):
- """Return a human-readable representation.
-
- >>> str(N['dog'])
- 'dog(n.)'
- """
- abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'}
- return self.form + "(" + abbrs[self.pos] + ")"
+ """Return a human-readable representation.
+
+ >>> str(N['dog'])
+ 'dog(n.)'
+ """
+ abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'}
+ return self.form + "(" + abbrs[self.pos] + ")"
def __repr__(self):
- """If ReadableRepresentations is true, return a human-readable
- representation, e.g. 'dog(n.)'.
-
- If ReadableRepresentations is false, return a machine-readable
- representation, e.g. "getWord('dog', 'noun')".
- """
- if ReadableRepresentations:
- return str(self)
- return "getWord" + `(self.form, self.pos)`
-
+ """If ReadableRepresentations is true, return a human-readable
+ representation, e.g. 'dog(n.)'.
+
+ If ReadableRepresentations is false, return a machine-readable
+ representation, e.g. "getWord('dog', 'noun')".
+ """
+ if ReadableRepresentations:
+ return str(self)
+ return "getWord" + repr((self.form, self.pos))
+
#
# Sequence protocol (a Word's elements are its Senses)
#
- def __nonzero__(self):
- return 1
+ def __bool__(self):
+ return 1
def __len__(self):
- return len(self.getSenses())
+ return len(self.getSenses())
def __getitem__(self, index):
- return self.getSenses()[index]
+ return self.getSenses()[index]
def __getslice__(self, i, j):
- return self.getSenses()[i:j]
+ return self.getSenses()[i:j]
class Synset:
@@ -356,157 +355,157 @@
def __init__(self, pos, offset, line):
"Initialize the synset from a line off a WN synset file."
- self.pos = pos
+ self.pos = pos
"part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB."
- self.offset = offset
+ self.offset = offset
"""integer offset into the part-of-speech file. Together
with pos, this can be used as a unique id."""
- tokens = string.split(line[:string.index(line, '|')])
- self.ssType = tokens[2]
- self.gloss = string.strip(line[string.index(line, '|') + 1:])
+ tokens = line[:line.index('|')].split()
+ self.ssType = tokens[2]
+ self.gloss = line[line.index('|') + 1:].strip()
self.lexname = Lexname.lexnames[int(tokens[1])]
- (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16))
- (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0]))
- if pos == VERB:
- (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0]))
- def extractVerbFrames(index, vfTuples):
- return tuple(map(lambda t:string.atoi(t[1]), filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples)))
- senseVerbFrames = []
- for index in range(1, len(self._senseTuples) + 1):
- senseVerbFrames.append(extractVerbFrames(index, vfTuples))
- self._senseVerbFrames = senseVerbFrames
- self.verbFrames = tuple(extractVerbFrames(None, vfTuples))
+ (self._senseTuples, remainder) = _partition(tokens[4:], 2, int(tokens[3], 16))
+ (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0]))
+ if pos == VERB:
+ (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0]))
+ def extractVerbFrames(index, vfTuples):
+ return tuple([int(t[1]) for t in list(filter(lambda t,i=index:int(t[2],16) in (0, i), vfTuples))])
+ senseVerbFrames = []
+ for index in range(1, len(self._senseTuples) + 1):
+ senseVerbFrames.append(extractVerbFrames(index, vfTuples))
+ self._senseVerbFrames = senseVerbFrames
+ self.verbFrames = tuple(extractVerbFrames(None, vfTuples))
"""A sequence of integers that index into
VERB_FRAME_STRINGS. These list the verb frames that any
Sense in this synset participates in. (See also
Sense.verbFrames.) Defined only for verbs."""
def getSenses(self):
- """Return a sequence of Senses.
-
- >>> N['dog'][0].getSenses()
- ('dog' in {noun: dog, domestic dog, Canis familiaris},)
- """
- if not hasattr(self, '_senses'):
- def loadSense(senseTuple, verbFrames=None, synset=self):
- return Sense(synset, senseTuple, verbFrames)
- if self.pos == VERB:
- self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames))
- del self._senseVerbFrames
- else:
- self._senses = tuple(map(loadSense, self._senseTuples))
- del self._senseTuples
- return self._senses
+ """Return a sequence of Senses.
+
+ >>> N['dog'][0].getSenses()
+ ('dog' in {noun: dog, domestic dog, Canis familiaris},)
+ """
+ if not hasattr(self, '_senses'):
+ def loadSense(senseTuple, verbFrames=None, synset=self):
+ return Sense(synset, senseTuple, verbFrames)
+ if self.pos == VERB:
+ self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames))
+ del self._senseVerbFrames
+ else:
+ self._senses = tuple(map(loadSense, self._senseTuples))
+ del self._senseTuples
+ return self._senses
senses = getSenses
def getPointers(self, pointerType=None):
- """Return a sequence of Pointers.
+ """Return a sequence of Pointers.
If pointerType is specified, only pointers of that type are
returned. In this case, pointerType should be an element of
POINTER_TYPES.
-
- >>> N['dog'][0].getPointers()[:5]
- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
- >>> N['dog'][0].getPointers(HYPERNYM)
- (hypernym -> {noun: canine, canid},)
- """
- if not hasattr(self, '_pointers'):
- def loadPointer(tuple, synset=self):
- return Pointer(synset.offset, tuple)
- self._pointers = tuple(map(loadPointer, self._pointerTuples))
- del self._pointerTuples
- if pointerType == None:
- return self._pointers
- else:
- _requirePointerType(pointerType)
- return filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers)
+
+ >>> N['dog'][0].getPointers()[:5]
+ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
+ >>> N['dog'][0].getPointers(HYPERNYM)
+ (hypernym -> {noun: canine, canid},)
+ """
+ if not hasattr(self, '_pointers'):
+ def loadPointer(tuple, synset=self):
+ return Pointer(synset.offset, tuple)
+ self._pointers = tuple(map(loadPointer, self._pointerTuples))
+ del self._pointerTuples
+ if pointerType == None:
+ return self._pointers
+ else:
+ _requirePointerType(pointerType)
+ return list(filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers))
pointers = getPointers # backwards compatability
def getPointerTargets(self, pointerType=None):
- """Return a sequence of Senses or Synsets.
-
+ """Return a sequence of Senses or Synsets.
+
If pointerType is specified, only targets of pointers of that
type are returned. In this case, pointerType should be an
element of POINTER_TYPES.
-
- >>> N['dog'][0].getPointerTargets()[:5]
- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
- >>> N['dog'][0].getPointerTargets(HYPERNYM)
- [{noun: canine, canid}]
- """
- return map(Pointer.target, self.getPointers(pointerType))
+
+ >>> N['dog'][0].getPointerTargets()[:5]
+ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
+ >>> N['dog'][0].getPointerTargets(HYPERNYM)
+ [{noun: canine, canid}]
+ """
+ return list(map(Pointer.target, self.getPointers(pointerType)))
pointerTargets = getPointerTargets # backwards compatability
def isTagged(self):
- """Return 1 if any sense is tagged.
-
- >>> N['dog'][0].isTagged()
- 1
- >>> N['dog'][1].isTagged()
- 0
- """
- return len(filter(Sense.isTagged, self.getSenses())) > 0
+ """Return 1 if any sense is tagged.
+
+ >>> N['dog'][0].isTagged()
+ 1
+ >>> N['dog'][1].isTagged()
+ 0
+ """
+ return len(list(filter(Sense.isTagged, self.getSenses()))) > 0
def __str__(self):
- """Return a human-readable representation.
-
- >>> str(N['dog'][0].synset)
- '{noun: dog, domestic dog, Canis familiaris}'
- """
- return "{" + self.pos + ": " + string.joinfields(map(lambda sense:sense.form, self.getSenses()), ", ") + "}"
+ """Return a human-readable representation.
+
+ >>> str(N['dog'][0].synset)
+ '{noun: dog, domestic dog, Canis familiaris}'
+ """
+ return "{" + self.pos + ": " + string.joinfields([sense.form for sense in self.getSenses()], ", ") + "}"
def __repr__(self):
- """If ReadableRepresentations is true, return a human-readable
- representation, e.g. 'dog(n.)'.
-
- If ReadableRepresentations is false, return a machine-readable
- representation, e.g. "getSynset(pos, 1234)".
- """
- if ReadableRepresentations:
- return str(self)
- return "getSynset" + `(self.pos, self.offset)`
+ """If ReadableRepresentations is true, return a human-readable
+ representation, e.g. 'dog(n.)'.
+
+ If ReadableRepresentations is false, return a machine-readable
+ representation, e.g. "getSynset(pos, 1234)".
+ """
+ if ReadableRepresentations:
+ return str(self)
+ return "getSynset" + repr((self.pos, self.offset))
def __cmp__(self, other):
- return _compareInstances(self, other, ('pos', 'offset'))
+ return _compareInstances(self, other, ('pos', 'offset'))
#
# Sequence protocol (a Synset's elements are its senses).
#
- def __nonzero__(self):
- return 1
+ def __bool__(self):
+ return 1
def __len__(self):
- """
- >>> len(N['dog'][0].synset)
- 3
- """
- return len(self.getSenses())
+ """
+ >>> len(N['dog'][0].synset)
+ 3
+ """
+ return len(self.getSenses())
def __getitem__(self, idx):
- """
- >>> N['dog'][0].synset[0] == N['dog'][0]
- 1
- >>> N['dog'][0].synset['dog'] == N['dog'][0]
- 1
- >>> N['dog'][0].synset[N['dog']] == N['dog'][0]
- 1
- >>> N['cat'][6]
- 'cat' in {noun: big cat, cat}
- """
- senses = self.getSenses()
- if isinstance(idx, Word):
- idx = idx.form
- if isinstance(idx, StringType):
- idx = _index(idx, map(lambda sense:sense.form, senses)) or \
- _index(idx, map(lambda sense:sense.form, senses), _equalsIgnoreCase)
- return senses[idx]
+ """
+ >>> N['dog'][0].synset[0] == N['dog'][0]
+ 1
+ >>> N['dog'][0].synset['dog'] == N['dog'][0]
+ 1
+ >>> N['dog'][0].synset[N['dog']] == N['dog'][0]
+ 1
+ >>> N['cat'][6]
+ 'cat' in {noun: big cat, cat}
+ """
+ senses = self.getSenses()
+ if isinstance(idx, Word):
+ idx = idx.form
+ if isinstance(idx, str):
+ idx = _index(idx, [sense.form for sense in senses]) or \
+ _index(idx, [sense.form for sense in senses], _equalsIgnoreCase)
+ return senses[idx]
def __getslice__(self, i, j):
- return self.getSenses()[i:j]
+ return self.getSenses()[i:j]
class Sense:
@@ -527,7 +526,7 @@
VERB_FRAME_STRINGS. These list the verb frames that this
Sense partipates in. Defined only for verbs.
- >>> decide = V['decide'][0].synset # first synset for 'decide'
+ >>> decide = V['decide'][0].synset # first synset for 'decide'
>>> decide[0].verbFrames
(8, 2, 26, 29)
>>> decide[1].verbFrames
@@ -538,124 +537,124 @@
def __init__(sense, synset, senseTuple, verbFrames=None):
"Initialize a sense from a synset's senseTuple."
- # synset is stored by key (pos, synset) rather than object
- # reference, to avoid creating a circular reference between
- # Senses and Synsets that will prevent the vm from
- # garbage-collecting them.
- sense.pos = synset.pos
+ # synset is stored by key (pos, synset) rather than object
+ # reference, to avoid creating a circular reference between
+ # Senses and Synsets that will prevent the vm from
+ # garbage-collecting them.
+ sense.pos = synset.pos
"part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"
- sense.synsetOffset = synset.offset
+ sense.synsetOffset = synset.offset
"synset key. This is used to retrieve the sense."
- sense.verbFrames = verbFrames
+ sense.verbFrames = verbFrames
"""A sequence of integers that index into
VERB_FRAME_STRINGS. These list the verb frames that this
Sense partipates in. Defined only for verbs."""
- (form, idString) = senseTuple
- sense.position = None
- if '(' in form:
- index = string.index(form, '(')
- key = form[index + 1:-1]
- form = form[:index]
- if key == 'a':
- sense.position = ATTRIBUTIVE
- elif key == 'p':
- sense.position = PREDICATIVE
- elif key == 'ip':
- sense.position = IMMEDIATE_POSTNOMINAL
- else:
- raise "unknown attribute " + key
- sense.form = string.replace(form, '_', ' ')
+ (form, idString) = senseTuple
+ sense.position = None
+ if '(' in form:
+ index = form.index('(')
+ key = form[index + 1:-1]
+ form = form[:index]
+ if key == 'a':
+ sense.position = ATTRIBUTIVE
+ elif key == 'p':
+ sense.position = PREDICATIVE
+ elif key == 'ip':
+ sense.position = IMMEDIATE_POSTNOMINAL
+ else:
+ raise "unknown attribute " + key
+ sense.form = form.replace('_', ' ')
"orthographic representation of the Word this is a Sense of."
def __getattr__(self, name):
- # see the note at __init__ about why 'synset' is provided as a
- # 'virtual' slot
- if name == 'synset':
- return getSynset(self.pos, self.synsetOffset)
+ # see the note at __init__ about why 'synset' is provided as a
+ # 'virtual' slot
+ if name == 'synset':
+ return getSynset(self.pos, self.synsetOffset)
elif name == 'lexname':
return self.synset.lexname
- else:
- raise AttributeError, name
+ else:
+ raise AttributeError(name)
def __str__(self):
- """Return a human-readable representation.
-
- >>> str(N['dog'])
- 'dog(n.)'
- """
- return `self.form` + " in " + str(self.synset)
+ """Return a human-readable representation.
+
+ >>> str(N['dog'])
+ 'dog(n.)'
+ """
+ return repr(self.form) + " in " + str(self.synset)
def __repr__(self):
- """If ReadableRepresentations is true, return a human-readable
- representation, e.g. 'dog(n.)'.
-
- If ReadableRepresentations is false, return a machine-readable
- representation, e.g. "getWord('dog', 'noun')".
- """
- if ReadableRepresentations:
- return str(self)
- return "%s[%s]" % (`self.synset`, `self.form`)
+ """If ReadableRepresentations is true, return a human-readable
+ representation, e.g. 'dog(n.)'.
+
+ If ReadableRepresentations is false, return a machine-readable
+ representation, e.g. "getWord('dog', 'noun')".
+ """
+ if ReadableRepresentations:
+ return str(self)
+ return "%s[%s]" % (repr(self.synset), repr(self.form))
def getPointers(self, pointerType=None):
- """Return a sequence of Pointers.
-
+ """Return a sequence of Pointers.
+
If pointerType is specified, only pointers of that type are
returned. In this case, pointerType should be an element of
POINTER_TYPES.
-
- >>> N['dog'][0].getPointers()[:5]
- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
- >>> N['dog'][0].getPointers(HYPERNYM)
- (hypernym -> {noun: canine, canid},)
- """
- senseIndex = _index(self, self.synset.getSenses())
- def pointsFromThisSense(pointer, selfIndex=senseIndex):
- return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex
- return filter(pointsFromThisSense, self.synset.getPointers(pointerType))
+
+ >>> N['dog'][0].getPointers()[:5]
+ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt})
+ >>> N['dog'][0].getPointers(HYPERNYM)
+ (hypernym -> {noun: canine, canid},)
+ """
+ senseIndex = _index(self, self.synset.getSenses())
+ def pointsFromThisSense(pointer, selfIndex=senseIndex):
+ return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex
+ return list(filter(pointsFromThisSense, self.synset.getPointers(pointerType)))
pointers = getPointers # backwards compatability
def getPointerTargets(self, pointerType=None):
- """Return a sequence of Senses or Synsets.
-
+ """Return a sequence of Senses or Synsets.
+
If pointerType is specified, only targets of pointers of that
type are returned. In this case, pointerType should be an
element of POINTER_TYPES.
-
- >>> N['dog'][0].getPointerTargets()[:5]
- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
- >>> N['dog'][0].getPointerTargets(HYPERNYM)
- [{noun: canine, canid}]
- """
- return map(Pointer.target, self.getPointers(pointerType))
+
+ >>> N['dog'][0].getPointerTargets()[:5]
+ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}]
+ >>> N['dog'][0].getPointerTargets(HYPERNYM)
+ [{noun: canine, canid}]
+ """
+ return list(map(Pointer.target, self.getPointers(pointerType)))
pointerTargets = getPointerTargets # backwards compatability
def getSenses(self):
- return self,
+ return self,
senses = getSenses # backwards compatability
def isTagged(self):
- """Return 1 if any sense is tagged.
-
- >>> N['dog'][0].isTagged()
- 1
- >>> N['dog'][1].isTagged()
- 0
- """
- word = self.word()
- return _index(self, word.getSenses()) < word.taggedSenseCount
+ """Return 1 if any sense is tagged.
+
+ >>> N['dog'][0].isTagged()
+ 1
+ >>> N['dog'][1].isTagged()
+ 0
+ """
+ word = self.word()
+ return _index(self, word.getSenses()) < word.taggedSenseCount
def getWord(self):
- return getWord(self.form, self.pos)
+ return getWord(self.form, self.pos)
word = getWord # backwards compatability
def __cmp__(self, other):
- def senseIndex(sense, synset=self.synset):
- return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form)
- return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other))
+ def senseIndex(sense, synset=self.synset):
+ return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form)
+ return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other))
class Pointer:
@@ -670,21 +669,21 @@
"""
_POINTER_TYPE_TABLE = {
- '!': ANTONYM,
+ '!': ANTONYM,
'@': HYPERNYM,
'~': HYPONYM,
- '=': ATTRIBUTE,
+ '=': ATTRIBUTE,
'^': ALSO_SEE,
'*': ENTAILMENT,
'>': CAUSE,
- '$': VERB_GROUP,
- '#m': MEMBER_MERONYM,
+ '$': VERB_GROUP,
+ '#m': MEMBER_MERONYM,
'#s': SUBSTANCE_MERONYM,
'#p': PART_MERONYM,
- '%m': MEMBER_HOLONYM,
+ '%m': MEMBER_HOLONYM,
'%s': SUBSTANCE_HOLONYM,
'%p': PART_HOLONYM,
- '&': SIMILAR,
+ '&': SIMILAR,
'<': PARTICIPLE_OF,
'\\': PERTAINYM,
# New in wn 2.0:
@@ -698,57 +697,57 @@
}
def __init__(self, sourceOffset, pointerTuple):
- (type, offset, pos, indices) = pointerTuple
- # dnaber: try to adapt to WordNet 2.1:
- if type == "@i":
- type = "@"
- if type == "~i":
- type = "~"
- # /dnaber
- self.type = Pointer._POINTER_TYPE_TABLE[type]
+ (type, offset, pos, indices) = pointerTuple
+ # dnaber: try to adapt to WordNet 2.1:
+ if type == "@i":
+ type = "@"
+ if type == "~i":
+ type = "~"
+ # /dnaber
+ self.type = Pointer._POINTER_TYPE_TABLE[type]
"""One of POINTER_TYPES."""
- self.sourceOffset = sourceOffset
- self.targetOffset = int(offset)
- self.pos = _normalizePOS(pos)
+ self.sourceOffset = sourceOffset
+ self.targetOffset = int(offset)
+ self.pos = _normalizePOS(pos)
"""part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"""
- indices = string.atoi(indices, 16)
- self.sourceIndex = indices >> 8
- self.targetIndex = indices & 255
+ indices = int(indices, 16)
+ self.sourceIndex = indices >> 8
+ self.targetIndex = indices & 255
def getSource(self):
- synset = getSynset(self.pos, self.sourceOffset)
- if self.sourceIndex:
- return synset[self.sourceIndex - 1]
- else:
- return synset
+ synset = getSynset(self.pos, self.sourceOffset)
+ if self.sourceIndex:
+ return synset[self.sourceIndex - 1]
+ else:
+ return synset
source = getSource # backwards compatability
def getTarget(self):
- synset = getSynset(self.pos, self.targetOffset)
- if self.targetIndex:
- return synset[self.targetIndex - 1]
- else:
- return synset
+ synset = getSynset(self.pos, self.targetOffset)
+ if self.targetIndex:
+ return synset[self.targetIndex - 1]
+ else:
+ return synset
target = getTarget # backwards compatability
def __str__(self):
- return self.type + " -> " + str(self.target())
+ return self.type + " -> " + str(self.target())
def __repr__(self):
- if ReadableRepresentations:
- return str(self)
- return "<" + str(self) + ">"
+ if ReadableRepresentations:
+ return str(self)
+ return "<" + str(self) + ">"
def __cmp__(self, other):
- diff = _compareInstances(self, other, ('pos', 'sourceOffset'))
- if diff:
- return diff
- synset = self.source()
- def pointerIndex(sense, synset=synset):
- return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex')))
- return cmp(pointerIndex(self), pointerIndex(other))
+ diff = _compareInstances(self, other, ('pos', 'sourceOffset'))
+ if diff:
+ return diff
+ synset = self.source()
+ def pointerIndex(sense, synset=synset):
+ return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex')))
+ return cmp(pointerIndex(self), pointerIndex(other))
# Loading the lexnames
@@ -769,7 +768,7 @@
def setupLexnames():
for l in open(WNSEARCHDIR+'/lexnames').readlines():
- i,name,category = string.split(l)
+ i,name,category = l.split()
Lexname(name,PartsOfSpeech[int(category)-1])
setupLexnames()
@@ -802,59 +801,59 @@
"""
def __init__(self, pos, filenameroot):
- self.pos = pos
+ self.pos = pos
"""part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB"""
- self.indexFile = _IndexFile(pos, filenameroot)
- self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE)
+ self.indexFile = _IndexFile(pos, filenameroot)
+ self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE)
def __repr__(self):
- dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'}
- if dictionaryVariables.get(self):
- return self.__module__ + "." + dictionaryVariables[self]
- return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos)
+ dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'}
+ if dictionaryVariables.get(self):
+ return self.__module__ + "." + dictionaryVariables[self]
+ return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos)
def getWord(self, form, line=None):
- key = string.replace(string.lower(form), ' ', '_')
- pos = self.pos
- def loader(key=key, line=line, indexFile=self.indexFile):
- line = line or indexFile.get(key)
- return line and Word(line)
- word = _entityCache.get((pos, key), loader)
- if word:
- return word
- else:
- raise KeyError, "%s is not in the %s database" % (`form`, `pos`)
+ key = form.lower().replace(' ', '_')
+ pos = self.pos
+ def loader(key=key, line=line, indexFile=self.indexFile):
+ line = line or indexFile.get(key)
+ return line and Word(line)
+ word = _entityCache.get((pos, key), loader)
+ if word != None:
+ return word
+ else:
+ raise KeyError("%s is not in the %s database" % (repr(form), repr(pos)))
def getSynset(self, offset):
- pos = self.pos
- def loader(pos=pos, offset=offset, dataFile=self.dataFile):
- return Synset(pos, offset, _lineAt(dataFile, offset))
- return _entityCache.get((pos, offset), loader)
+ pos = self.pos
+ def loader(pos=pos, offset=offset, dataFile=self.dataFile):
+ return Synset(pos, offset, _lineAt(dataFile, offset))
+ return _entityCache.get((pos, offset), loader)
def _buildIndexCacheFile(self):
- self.indexFile._buildIndexCacheFile()
+ self.indexFile._buildIndexCacheFile()
#
# Sequence protocol (a Dictionary's items are its Words)
#
- def __nonzero__(self):
- """Return false. (This is to avoid scanning the whole index file
- to compute len when a Dictionary is used in test position.)
-
- >>> N and 'true'
- 'true'
- """
- return 1
+ def __bool__(self):
+ """Return false. (This is to avoid scanning the whole index file
+ to compute len when a Dictionary is used in test position.)
+
+ >>> N and 'true'
+ 'true'
+ """
+ return 1
def __len__(self):
- """Return the number of index entries.
-
- >>> len(ADJ)
- 21435
- """
- if not hasattr(self, 'length'):
- self.length = len(self.indexFile)
- return self.length
+ """Return the number of index entries.
+
+ >>> len(ADJ)
+ 21435
+ """
+ if not hasattr(self, 'length'):
+ self.length = len(self.indexFile)
+ return self.length
def __getslice__(self, a, b):
results = []
@@ -868,22 +867,22 @@
return results
def __getitem__(self, index):
- """If index is a String, return the Word whose form is
- index. If index is an integer n, return the Word
- indexed by the n'th Word in the Index file.
-
- >>> N['dog']
- dog(n.)
- >>> N[0]
- 'hood(n.)
- """
- if isinstance(index, StringType):
- return self.getWord(index)
- elif isinstance(index, IntType):
- line = self.indexFile[index]
- return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
- else:
- raise TypeError, "%s is not a String or Int" % `index`
+ """If index is a String, return the Word whose form is
+ index. If index is an integer n, return the Word
+ indexed by the n'th Word in the Index file.
+
+ >>> N['dog']
+ dog(n.)
+ >>> N[0]
+ 'hood(n.)
+ """
+ if isinstance(index, str):
+ return self.getWord(index)
+ elif isinstance(index, int):
+ line = self.indexFile[index]
+ return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line)
+ else:
+ raise TypeError("%s is not a String or Int" % repr(index))
#
# Dictionary protocol
@@ -892,54 +891,54 @@
#
def get(self, key, default=None):
- """Return the Word whose form is _key_, or _default_.
-
- >>> N.get('dog')
- dog(n.)
- >>> N.get('inu')
- """
- try:
- return self[key]
- except LookupError:
- return default
+ """Return the Word whose form is _key_, or _default_.
+
+ >>> N.get('dog')
+ dog(n.)
+ >>> N.get('inu')
+ """
+ try:
+ return self[key]
+ except LookupError:
+ return default
def keys(self):
- """Return a sorted list of strings that index words in this
- dictionary."""
- return self.indexFile.keys()
+ """Return a sorted list of strings that index words in this
+ dictionary."""
+ return list(self.indexFile.keys())
def has_key(self, form):
- """Return true iff the argument indexes a word in this dictionary.
-
- >>> N.has_key('dog')
- 1
- >>> N.has_key('inu')
- 0
- """
- return self.indexFile.has_key(form)
+ """Return true iff the argument indexes a word in this dictionary.
+
+ >>> N.has_key('dog')
+ 1
+ >>> N.has_key('inu')
+ 0
+ """
+ return form in self.indexFile
#
# Testing
#
def _testKeys(self):
- """Verify that index lookup can find each word in the index file."""
- print "Testing: ", self
- file = open(self.indexFile.file.name, _FILE_OPEN_MODE)
- counter = 0
- while 1:
- line = file.readline()
- if line == '': break
- if line[0] != ' ':
- key = string.replace(line[:string.find(line, ' ')], '_', ' ')
- if (counter % 1000) == 0:
- print "%s..." % (key,),
- import sys
- sys.stdout.flush()
- counter = counter + 1
- self[key]
- file.close()
- print "done."
+ """Verify that index lookup can find each word in the index file."""
+ print("Testing: ", self)
+ file = open(self.indexFile.file.name, _FILE_OPEN_MODE)
+ counter = 0
+ while 1:
+ line = file.readline()
+ if line == '': break
+ if line[0] != ' ':
+ key = string.replace(line[:string.find(line, ' ')], '_', ' ')
+ if (counter % 1000) == 0:
+ print("%s..." % (key,), end=' ')
+ import sys
+ sys.stdout.flush()
+ counter = counter + 1
+ self[key]
+ file.close()
+ print("done.")
class _IndexFile:
@@ -947,69 +946,69 @@
Sequence and Dictionary interface to a sorted index file."""
def __init__(self, pos, filenameroot):
- self.pos = pos
- self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE)
- self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset)
- self.rewind()
- self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx")
- try:
- import shelve
- self.indexCache = shelve.open(self.shelfname, 'r')
- except:
- pass
+ self.pos = pos
+ self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE)
+ self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset)
+ self.rewind()
+ self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx")
+ try:
+ import shelve
+ self.indexCache = shelve.open(self.shelfname, 'r')
+ except:
+ pass
def rewind(self):
- self.file.seek(0)
- while 1:
- offset = self.file.tell()
- line = self.file.readline()
- if (line[0] != ' '):
- break
- self.nextIndex = 0
- self.nextOffset = offset
+ self.file.seek(0)
+ while 1:
+ offset = self.file.tell()
+ line = self.file.readline()
+ if (line[0] != ' '):
+ break
+ self.nextIndex = 0
+ self.nextOffset = offset
#
# Sequence protocol (an _IndexFile's items are its lines)
#
- def __nonzero__(self):
- return 1
+ def __bool__(self):
+ return 1
def __len__(self):
- if hasattr(self, 'indexCache'):
- return len(self.indexCache)
- self.rewind()
- lines = 0
- while 1:
- line = self.file.readline()
- if line == "":
- break
- lines = lines + 1
- return lines
+ if hasattr(self, 'indexCache'):
+ return len(self.indexCache)
+ self.rewind()
+ lines = 0
+ while 1:
+ line = self.file.readline()
+ if line == "":
+ break
+ lines = lines + 1
+ return lines
- def __nonzero__(self):
- return 1
+ def __bool__(self):
+ return 1
def __getitem__(self, index):
- if isinstance(index, StringType):
- if hasattr(self, 'indexCache'):
- return self.indexCache[index]
- return binarySearchFile(self.file, index, self.offsetLineCache, 8)
- elif isinstance(index, IntType):
- if hasattr(self, 'indexCache'):
- return self.get(self.keys[index])
- if index < self.nextIndex:
- self.rewind()
- while self.nextIndex <= index:
- self.file.seek(self.nextOffset)
- line = self.file.readline()
- if line == "":
- raise IndexError, "index out of range"
- self.nextIndex = self.nextIndex + 1
- self.nextOffset = self.file.tell()
- return line
- else:
- raise TypeError, "%s is not a String or Int" % `index`
-
+ if isinstance(index, str):
+ if hasattr(self, 'indexCache'):
+ return self.indexCache[index]
+ return binarySearchFile(self.file, index, self.offsetLineCache, 8)
+ elif isinstance(index, int):
+ if hasattr(self, 'indexCache'):
+ return self.get(self.keys[index])
+ if index < self.nextIndex:
+ self.rewind()
+ while self.nextIndex <= index:
+ self.file.seek(self.nextOffset)
+ line = self.file.readline()
+ if line == "":
+ raise IndexError("index out of range")
+ self.nextIndex = self.nextIndex + 1
+ self.nextOffset = self.file.tell()
+ return line
+ else:
+ raise TypeError("%s is not a String or Int" % repr(index))
+
#
# Dictionary protocol
#
@@ -1017,62 +1016,62 @@
#
def get(self, key, default=None):
- try:
- return self[key]
- except LookupError:
- return default
+ try:
+ return self[key]
+ except LookupError:
+ return default
def keys(self):
- if hasattr(self, 'indexCache'):
- keys = self.indexCache.keys()
- keys.sort()
- return keys
- else:
- keys = []
- self.rewind()
- while 1:
- line = self.file.readline()
- if not line: break
+ if hasattr(self, 'indexCache'):
+ keys = list(self.indexCache.keys())
+ keys.sort()
+ return keys
+ else:
+ keys = []
+ self.rewind()
+ while 1:
+ line = self.file.readline()
+ if not line: break
key = line.split(' ', 1)[0]
- keys.append(key.replace('_', ' '))
- return keys
+ keys.append(key.replace('_', ' '))
+ return keys
def has_key(self, key):
- key = key.replace(' ', '_') # test case: V['haze over']
- if hasattr(self, 'indexCache'):
- return self.indexCache.has_key(key)
- return self.get(key) != None
+ key = key.replace(' ', '_') # test case: V['haze over']
+ if hasattr(self, 'indexCache'):
+ return key in self.indexCache
+ return self.get(key) != None
#
# Index file
#
def _buildIndexCacheFile(self):
- import shelve
- import os
- print "Building %s:" % (self.shelfname,),
- tempname = self.shelfname + ".temp"
- try:
- indexCache = shelve.open(tempname)
- self.rewind()
- count = 0
- while 1:
- offset, line = self.file.tell(), self.file.readline()
- if not line: break
- key = line[:string.find(line, ' ')]
- if (count % 1000) == 0:
- print "%s..." % (key,),
- import sys
- sys.stdout.flush()
- indexCache[key] = line
- count = count + 1
- indexCache.close()
- os.rename(tempname, self.shelfname)
- finally:
- try: os.remove(tempname)
- except: pass
- print "done."
- self.indexCache = shelve.open(self.shelfname, 'r')
+ import shelve
+ import os
+ print("Building %s:" % (self.shelfname,), end=' ')
+ tempname = self.shelfname + ".temp"
+ try:
+ indexCache = shelve.open(tempname)
+ self.rewind()
+ count = 0
+ while 1:
+ offset, line = self.file.tell(), self.file.readline()
+ if not line: break
+ key = line[:string.find(line, ' ')]
+ if (count % 1000) == 0:
+ print("%s..." % (key,), end=' ')
+ import sys
+ sys.stdout.flush()
+ indexCache[key] = line
+ count = count + 1
+ indexCache.close()
+ os.rename(tempname, self.shelfname)
+ finally:
+ try: os.remove(tempname)
+ except: pass
+ print("done.")
+ self.indexCache = shelve.open(self.shelfname, 'r')
#
@@ -1099,20 +1098,20 @@
def _requirePointerType(pointerType):
if pointerType not in POINTER_TYPES:
- raise TypeError, `pointerType` + " is not a pointer type"
+ raise TypeError(repr(pointerType) + " is not a pointer type")
return pointerType
def _compareInstances(a, b, fields):
""""Return -1, 0, or 1 according to a comparison first by type,
then by class, and finally by each of fields.""" # " <- for emacs
if not hasattr(b, '__class__'):
- return cmp(type(a), type(b))
+ return cmp(type(a), type(b))
elif a.__class__ != b.__class__:
- return cmp(a.__class__, b.__class__)
+ return cmp(a.__class__, b.__class__)
for field in fields:
- diff = cmp(getattr(a, field), getattr(b, field))
- if diff:
- return diff
+ diff = cmp(getattr(a, field), getattr(b, field))
+ if diff:
+ return diff
return 0
def _equalsIgnoreCase(a, b):
@@ -1123,21 +1122,21 @@
>>> _equalsIgnoreCase('dOg', 'DOG')
1
"""
- return a == b or string.lower(a) == string.lower(b)
+ return a == b or a.lower() == b.lower()
#
# File utilities
#
def _dataFilePathname(filenameroot):
if os.name in ('dos', 'nt'):
- path = os.path.join(WNSEARCHDIR, filenameroot + ".dat")
+ path = os.path.join(WNSEARCHDIR, filenameroot + ".dat")
if os.path.exists(path):
return path
return os.path.join(WNSEARCHDIR, "data." + filenameroot)
def _indexFilePathname(filenameroot):
if os.name in ('dos', 'nt'):
- path = os.path.join(WNSEARCHDIR, filenameroot + ".idx")
+ path = os.path.join(WNSEARCHDIR, filenameroot + ".idx")
if os.path.exists(path):
return path
return os.path.join(WNSEARCHDIR, "index." + filenameroot)
@@ -1154,30 +1153,30 @@
#if count > 20:
# raise "infinite loop"
lastState = start, end
- middle = (start + end) / 2
- if cache.get(middle):
- offset, line = cache[middle]
- else:
- file.seek(max(0, middle - 1))
- if middle > 0:
- file.readline()
- offset, line = file.tell(), file.readline()
- if currentDepth < cacheDepth:
- cache[middle] = (offset, line)
+ middle = (start + end) / 2
+ if cache.get(middle):
+ offset, line = cache[middle]
+ else:
+ file.seek(max(0, middle - 1))
+ if middle > 0:
+ file.readline()
+ offset, line = file.tell(), file.readline()
+ if currentDepth < cacheDepth:
+ cache[middle] = (offset, line)
#print start, middle, end, offset, line,
- if offset > end:
- assert end != middle - 1, "infinite loop"
- end = middle - 1
- elif line[:keylen] == key:# and line[keylen + 1] == ' ':
- return line
+ if offset > end:
+ assert end != middle - 1, "infinite loop"
+ end = middle - 1
+ elif line[:keylen] == key:# and line[keylen + 1] == ' ':
+ return line
#elif offset == end:
# return None
- elif line > key:
- assert end != middle - 1, "infinite loop"
- end = middle - 1
- elif line < key:
- start = offset + len(line) - 1
- currentDepth = currentDepth + 1
+ elif line > key:
+ assert end != middle - 1, "infinite loop"
+ end = middle - 1
+ elif line < key:
+ start = offset + len(line) - 1
+ currentDepth = currentDepth + 1
thisState = start, end
if lastState == thisState:
# detects the condition where we're searching past the end
@@ -1206,12 +1205,12 @@
"""
index = 0
for element in sequence:
- value = element
- if keyfn:
- value = keyfn(value)
- if (not testfn and value == key) or (testfn and testfn(value, key)):
- return index
- index = index + 1
+ value = element
+ if keyfn:
+ value = keyfn(value)
+ if (not testfn and value == key) or (testfn and testfn(value, key)):
+ return index
+ index = index + 1
return None
def _partition(sequence, size, count):
@@ -1224,7 +1223,7 @@
partitions = []
for index in range(0, size * count, size):
- partitions.append(sequence[index:index + size])
+ partitions.append(sequence[index:index + size])
return (partitions, sequence[size * count:])
@@ -1269,49 +1268,49 @@
but the two implementations aren't directly comparable."""
def __init__(this, capacity):
- this.capacity = capacity
- this.clear()
+ this.capacity = capacity
+ this.clear()
def clear(this):
- this.values = {}
- this.history = {}
- this.oldestTimestamp = 0
- this.nextTimestamp = 1
+ this.values = {}
+ this.history = {}
+ this.oldestTimestamp = 0
+ this.nextTimestamp = 1
def removeOldestEntry(this):
- while this.oldestTimestamp < this.nextTimestamp:
- if this.history.get(this.oldestTimestamp):
- key = this.history[this.oldestTimestamp]
- del this.history[this.oldestTimestamp]
- del this.values[key]
- return
- this.oldestTimestamp = this.oldestTimestamp + 1
+ while this.oldestTimestamp < this.nextTimestamp:
+ if this.history.get(this.oldestTimestamp):
+ key = this.history[this.oldestTimestamp]
+ del this.history[this.oldestTimestamp]
+ del this.values[key]
+ return
+ this.oldestTimestamp = this.oldestTimestamp + 1
def setCapacity(this, capacity):
- if capacity == 0:
- this.clear()
- else:
- this.capacity = capacity
- while len(this.values) > this.capacity:
- this.removeOldestEntry()
+ if capacity == 0:
+ this.clear()
+ else:
+ this.capacity = capacity
+ while len(this.values) > this.capacity:
+ this.removeOldestEntry()
def get(this, key, loadfn=None):
- value = None
- if this.values:
- pair = this.values.get(key)
- if pair:
- (value, timestamp) = pair
- del this.history[timestamp]
- if value == None:
- value = loadfn and loadfn()
- if this.values != None:
- timestamp = this.nextTimestamp
- this.nextTimestamp = this.nextTimestamp + 1
- this.values[key] = (value, timestamp)
- this.history[timestamp] = key
- if len(this.values) > this.capacity:
- this.removeOldestEntry()
- return value
+ value = None
+ if this.values:
+ pair = this.values.get(key)
+ if pair:
+ (value, timestamp) = pair
+ del this.history[timestamp]
+ if value == None:
+ value = loadfn and loadfn()
+ if this.values != None:
+ timestamp = this.nextTimestamp
+ this.nextTimestamp = this.nextTimestamp + 1
+ this.values[key] = (value, timestamp)
+ this.history[timestamp] = key
+ if len(this.values) > this.capacity:
+ this.removeOldestEntry()
+ return value
class _NullCache:
@@ -1319,10 +1318,10 @@
LRUCache implements), but doesn't store any values."""
def clear():
- pass
+ pass
def get(this, key, loadfn=None):
- return loadfn and loadfn()
+ return loadfn and loadfn()
DEFAULT_CACHE_CAPACITY = 1000
@@ -1335,7 +1334,7 @@
def enableCache():
"""Enable the entity cache."""
if not isinstance(_entityCache, LRUCache):
- _entityCache = _LRUCache(size)
+ _entityCache = _LRUCache(size)
def clearCache():
"""Clear the entity cache."""
@@ -1373,36 +1372,36 @@
_POSNormalizationTable = {}
_POStoDictionaryTable = {}
for pos, abbreviations in (
- (NOUN, "noun n n."),
- (VERB, "verb v v."),
- (ADJECTIVE, "adjective adj adj. a s"),
- (ADVERB, "adverb adv adv. r")):
- tokens = string.split(abbreviations)
- for token in tokens:
- _POSNormalizationTable[token] = pos
- _POSNormalizationTable[string.upper(token)] = pos
+ (NOUN, "noun n n."),
+ (VERB, "verb v v."),
+ (ADJECTIVE, "adjective adj adj. a s"),
+ (ADVERB, "adverb adv adv. r")):
+ tokens = abbreviations.split()
+ for token in tokens:
+ _POSNormalizationTable[token] = pos
+ _POSNormalizationTable[token.upper()] = pos
for dict in Dictionaries:
- _POSNormalizationTable[dict] = dict.pos
- _POStoDictionaryTable[dict.pos] = dict
+ _POSNormalizationTable[dict] = dict.pos
+ _POStoDictionaryTable[dict.pos] = dict
_initializePOSTables()
def _normalizePOS(pos):
norm = _POSNormalizationTable.get(pos)
if norm:
- return norm
- raise TypeError, `pos` + " is not a part of speech type"
+ return norm
+ raise TypeError(repr(pos) + " is not a part of speech type")
def _dictionaryFor(pos):
pos = _normalizePOS(pos)
dict = _POStoDictionaryTable.get(pos)
if dict == None:
- raise RuntimeError, "The " + `pos` + " dictionary has not been created"
+ raise RuntimeError("The " + repr(pos) + " dictionary has not been created")
return dict
def buildIndexFiles():
for dict in Dictionaries:
- dict._buildIndexCacheFile()
+ dict._buildIndexCacheFile()
#
@@ -1412,7 +1411,7 @@
def _testKeys():
#This is slow, so don't do it as part of the normal test procedure.
for dictionary in Dictionaries:
- dictionary._testKeys()
+ dictionary._testKeys()
def _test(reset=0):
import doctest, wordnet
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/darssin/mythes-en.git
git@gitee.com:darssin/mythes-en.git
darssin
mythes-en
mythes-en
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385