biology/checkm: Fix build with setuptools 58.0.0+

With hat: python
author: Po-Chuan Hsieh <sunpoet@FreeBSD.org> 2022-03-25 13:32:00 +0000
committer: Po-Chuan Hsieh <sunpoet@FreeBSD.org> 2022-03-25 13:38:04 +0000
commit: c750cbac580143e48a23ed5af71ac671614b8015 (patch)
tree: 23947368390016087221e9f9b685b9fc7325647b /biology/checkm
parent: f77ee7e82e76a434625ce15cdbe5f8358e1617e2 (diff)
download: ports-c750cbac580143e48a23ed5af71ac671614b8015.tar.gz
ports-c750cbac580143e48a23ed5af71ac671614b8015.zip
2 files changed, 649 insertions, 1 deletions
diff --git a/biology/checkm/Makefile b/biology/checkm/Makefile
index 302c1ba374d0..6bb02b8266de 100644
--- a/biology/checkm/Makefile
+++ b/biology/checkm/Makefile
@@ -15,7 +15,7 @@ RUN_DEPENDS=	${PYNUMPY} \
 		${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \
 		${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR}
 
-USES=		python:3.7+
+USES=		dos2unix python:3.7+
 USE_GITHUB=	yes
 GH_ACCOUNT=	Ecogenomics
 USE_PYTHON=	distutils noflavors autoplist
diff --git a/biology/checkm/files/patch-2to3 b/biology/checkm/files/patch-2to3
new file mode 100644
index 000000000000..04b37972c86e
--- /dev/null
+++ b/biology/checkm/files/patch-2to3
@@ -0,0 +1,648 @@
+--- checkm/binTools.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/binTools.py
+@@ -26,7 +26,7 @@ import gzip
+ 
+ import numpy as np
+ 
+-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest
++from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest
+ from checkm.util.seqUtils import readFasta, writeFasta, baseCount
+ from checkm.genomicSignatures import GenomicSignatures
+ from checkm.prodigal import ProdigalGeneFeatureParser
+@@ -123,34 +123,34 @@ class BinTools():
+                     seqId = line[1:].split(None, 1)[0]
+ 
+                     if seqId in seqIds:
+-                        print '  [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)
++                        print('  [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId))
+                     seqIds.add(seqId)
+ 
+             binSeqs[binId] = seqIds
+ 
+         # check for sequences assigned to multiple bins
+         bDuplicates = False
+-        binIds = binSeqs.keys()
+-        for i in xrange(0, len(binIds)):
+-            for j in xrange(i + 1, len(binIds)):
++        binIds = list(binSeqs.keys())
++        for i in range(0, len(binIds)):
++            for j in range(i + 1, len(binIds)):
+                 seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]]))
+ 
+                 if len(seqInter) > 0:
+                     bDuplicates = True
+-                    print '  Sequences shared between %s and %s: ' % (binIds[i], binIds[j])
++                    print('  Sequences shared between %s and %s: ' % (binIds[i], binIds[j]))
+                     for seqId in seqInter:
+-                        print '    ' + seqId
+-                    print ''
++                        print('    ' + seqId)
++                    print('')
+ 
+         if not bDuplicates:
+-            print '  No sequences assigned to multiple bins.'
++            print('  No sequences assigned to multiple bins.')
+ 
+     def gcDist(self, seqs):
+         """GC statistics for bin."""
+         GCs = []
+         gcTotal = 0
+         basesTotal = 0
+-        for _, seq in seqs.iteritems():
++        for _, seq in seqs.items():
+             a, c, g, t = baseCount(seq)
+             gc = g + c
+             bases = a + c + g + t
+@@ -171,7 +171,7 @@ class BinTools():
+ 
+         codingBasesTotal = 0
+         basesTotal = 0
+-        for seqId, seq in seqs.iteritems():
++        for seqId, seq in seqs.items():
+             codingBases = prodigalParser.codingBases(seqId)
+ 
+             CDs.append(float(codingBases) / len(seq))
+@@ -186,11 +186,11 @@ class BinTools():
+     def binTetraSig(self, seqs, tetraSigs):
+         """Tetranucleotide signature for bin. """
+         binSize = 0
+-        for _, seq in seqs.iteritems():
++        for _, seq in seqs.items():
+             binSize += len(seq)
+ 
+         bInit = True
+-        for seqId, seq in seqs.iteritems():
++        for seqId, seq in seqs.items():
+             weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize)
+             if bInit:
+                 binSig = weightedTetraSig
+@@ -247,32 +247,32 @@ class BinTools():
+             meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser)
+ 
+             # find keys into GC and CD distributions
+-            closestGC = findNearest(np.array(gcBounds.keys()), meanGC)
+-            sampleSeqLen = gcBounds[closestGC].keys()[0]
++            closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC)
++            sampleSeqLen = list(gcBounds[closestGC].keys())[0]
+             d = gcBounds[closestGC][sampleSeqLen]
+-            gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
+-            gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0)
++            gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
++            gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0)
+ 
+-            closestCD = findNearest(np.array(cdBounds.keys()), meanCD)
+-            sampleSeqLen = cdBounds[closestCD].keys()[0]
++            closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD)
++            sampleSeqLen = list(cdBounds[closestCD].keys())[0]
+             d = cdBounds[closestCD][sampleSeqLen]
+-            cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
++            cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
+ 
+-            tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution)
++            tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution)
+ 
+             index = 0
+-            for seqId, seq in seqs.iteritems():
++            for seqId, seq in seqs.items():
+                 seqLen = len(seq)
+ 
+                 # find GC, CD, and TD bounds
+-                closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen)
++                closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen)
+                 gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey]
+                 gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey]
+ 
+-                closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen)
++                closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen)
+                 cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey]
+ 
+-                closestSeqLen = findNearest(tdBounds.keys(), seqLen)
++                closestSeqLen = findNearest(list(tdBounds.keys()), seqLen)
+                 tdBound = tdBounds[closestSeqLen][tdBoundKey]
+ 
+                 outlyingDists = []
+--- checkm/checkmData.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/checkmData.py
+@@ -85,11 +85,11 @@ class DBConfig(object):
+         """Work out if we have permission to write to the CheckM config before attempting to make changes"""
+         try:
+             open(self.configFile, 'a')
+-        except IOError, e:
+-            print "You do not seem to have permission to edit the checkm config file"
+-            print "located at %s" % self.configFile
+-            print "Please try again with updated privileges. Error was:\n"
+-            print e
++        except IOError as e:
++            print("You do not seem to have permission to edit the checkm config file")
++            print("located at %s" % self.configFile)
++            print("Please try again with updated privileges. Error was:\n")
++            print(e)
+             return False
+         return True
+ 
+@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager):
+             else:
+                 path = os.path.abspath(os.path.expanduser(path))
+ 
+-            print ""
++            print("")
+             if os.path.exists(path):
+                 # path exists
+                 if os.access(path, os.W_OK):
+                     # path is writable
+                     path_set = True
+-                    print "Path [%s] exists and you have permission to write to this folder." % path
++                    print("Path [%s] exists and you have permission to write to this folder." % path)
+                 else:
+-                    print "Path [%s] exists but you do not have permission to write to this folder." % path
++                    print("Path [%s] exists but you do not have permission to write to this folder." % path)
+             else:
+                 # path does not exist, try to make it
+                 "Path [%s] does not exist so I will attempt to create it" % path
+                 try:
+                     self.makeSurePathExists(path)
+-                    print "Path [%s] has been created and you have permission to write to this folder." % path
++                    print("Path [%s] has been created and you have permission to write to this folder." % path)
+                     path_set = True
+                 except Exception:
+-                    print "Unable to make the folder, Error was: %s" % sys.exc_info()[0]
++                    print("Unable to make the folder, Error was: %s" % sys.exc_info()[0])
+                 minimal = True
+ 
+         # (re)make the manifest file
+-        print "(re) creating manifest file (please be patient)."
++        print("(re) creating manifest file (please be patient).")
+         self.createManifest(path, self.config.values["localManifestName"])
+ 
+         return path
+@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager):
+     def checkPermissions(self):
+         """See if the user has permission to write to the data directory"""
+         if not os.access(self.config.values["dataRoot"], os.W_OK):
+-            print "You do not seem to have permission to edit the CheckM data folder"
+-            print "located at %s" % self.config.values["dataRoot"]
++            print("You do not seem to have permission to edit the CheckM data folder")
++            print("located at %s" % self.config.values["dataRoot"])
+             return False
+ 
+         return True
+--- checkm/coverage.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/coverage.py
+@@ -62,7 +62,7 @@ class Coverage():
+             binId = binIdFromFilename(binFile)
+ 
+             seqs = readFasta(binFile)
+-            for seqId, seq in seqs.iteritems():
++            for seqId, seq in seqs.items():
+                 seqIdToBinId[seqId] = binId
+                 seqIdToSeqLen[seqId] = len(seq)
+ 
+@@ -97,12 +97,12 @@ class Coverage():
+         print(header)
+ 
+         # get length of all seqs
+-        for bamFile, seqIds in coverageInfo.iteritems():
+-            for seqId in seqIds.keys():
++        for bamFile, seqIds in coverageInfo.items():
++            for seqId in list(seqIds.keys()):
+                 seqIdToSeqLen[seqId] = seqIds[seqId].seqLen
+ 
+         # write coverage stats for all scaffolds to file
+-        for seqId, seqLen in seqIdToSeqLen.iteritems():
++        for seqId, seqLen in seqIdToSeqLen.items():
+             rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen)
+             for bamFile in bamFiles:
+                 bamId = binIdFromFilename(bamFile)
+@@ -171,7 +171,7 @@ class Coverage():
+             writeProc.join()
+         except:
+             # make sure all processes are terminated
+-            print traceback.format_exc()
++            print(traceback.format_exc())
+             for p in workerProc:
+                 p.terminate()
+ 
+@@ -271,16 +271,16 @@ class Coverage():
+         if self.logger.getEffectiveLevel() <= logging.INFO:
+             sys.stderr.write('\n')
+ 
+-            print ''
+-            print '    # total reads: %d' % totalReads
+-            print '      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
+-            print '      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
+-            print '      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
+-            print '      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
+-            print '      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
+-            print '      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
+-            print '      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
+-            print ''
++            print('')
++            print('    # total reads: %d' % totalReads)
++            print('      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
++            print('      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
++            print('      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
++            print('      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
++            print('      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
++            print('      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
++            print('      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
++            print('')
+ 
+     def parseCoverage(self, coverageFile):
+         """Read coverage information from file."""
+@@ -301,7 +301,7 @@ class Coverage():
+             if seqId not in coverageStats[binId]:
+                 coverageStats[binId][seqId] = {}
+ 
+-            for i in xrange(3, len(lineSplit), 3):
++            for i in range(3, len(lineSplit), 3):
+                 bamId = lineSplit[i]
+                 coverage = float(lineSplit[i + 1])
+                 coverageStats[binId][seqId][bamId] = coverage
+@@ -325,7 +325,7 @@ class Coverage():
+ 
+             # calculate mean coverage (weighted by scaffold length)
+             # for each bin under each BAM file
+-            for i in xrange(3, len(lineSplit), 3):
++            for i in range(3, len(lineSplit), 3):
+                 bamId = lineSplit[i]
+                 coverage = float(lineSplit[i + 1])
+                 binCoverages[binId][bamId].append(coverage)
+@@ -341,13 +341,13 @@ class Coverage():
+ 
+         profiles = defaultdict(dict)
+         for binId in binStats:
+-            for bamId, stats in binStats[binId].iteritems():
++            for bamId, stats in binStats[binId].items():
+                 binLength, meanBinCoverage = stats
+                 coverages = binCoverages[binId][bamId]
+ 
+                 varCoverage = 0
+                 if len(coverages) > 1:
+-                    varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages))
++                    varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages])
+ 
+                 profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)]
+ 
+--- checkm/coverageWindows.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/coverageWindows.py
+@@ -188,10 +188,10 @@ class CoverageWindows():
+                     try:
+                         end += windowSize
+                     except:
+-                        print '*****************'
+-                        print end
+-                        print windowSize
+-                        print '******************'
++                        print('*****************')
++                        print(end)
++                        print(windowSize)
++                        print('******************')
+ 
+                 coverage = float(sum(readLoader.coverage)) / seqLen
+ 
+@@ -239,13 +239,13 @@ class CoverageWindows():
+         if self.logger.getEffectiveLevel() <= logging.INFO:
+             sys.stderr.write('\n')
+ 
+-            print ''
+-            print '    # total reads: %d' % totalReads
+-            print '      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
+-            print '      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
+-            print '      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
+-            print '      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
+-            print '      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
+-            print '      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
+-            print '      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
+-            print ''
++            print('')
++            print('    # total reads: %d' % totalReads)
++            print('      # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
++            print('      # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
++            print('      # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
++            print('      # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
++            print('      # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
++            print('      # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
++            print('      # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
++            print('')
+--- checkm/manifestManager.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/manifestManager.py
+@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest"
+ # system includes
+ import os
+ import hashlib
+-import urllib2
+-import urllib
++import urllib.request, urllib.error, urllib.parse
++import urllib.request, urllib.parse, urllib.error
+ import shutil
+ import errno
+ 
+@@ -121,15 +121,15 @@ class ManifestManager(object):
+         source = ""
+         # first we assume it is remote
+         try:
+-            s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
++            s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
+             source = sourceManifestLocation + "/"
+         except ValueError:
+             # then it is probably a file
+             s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
+             source = os.path.join(sourceManifestLocation) + os.path.sep
+-        except urllib2.URLError:
++        except urllib.error.URLError:
+             # problems connecting to server, perhaps user is behind a proxy or firewall
+-            print "Error: failed to connect to server."
++            print("Error: failed to connect to server.")
+             return (None, None, None, None, None)
+ 
+         first_line = True
+@@ -140,11 +140,11 @@ class ManifestManager(object):
+                     # get the type of the manifest
+                     s_type = self.getManType(line)
+                     if s_type != l_type:
+-                        print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
++                        print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type))
+                         return (None, None, None, None, None)
+                 else:
+                     # no type specified
+-                    print "Error: type of source manifest is not specified. Is this a valid manifest file?"
++                    print("Error: type of source manifest is not specified. Is this a valid manifest file?")
+                     return (None, None, None, None, None)
+ 
+                 self.type = l_type
+@@ -174,7 +174,7 @@ class ManifestManager(object):
+                         deleted.append(fields[0])
+ 
+         # check for new files
+-        for f in source_man.keys():
++        for f in list(source_man.keys()):
+             if source_man[f][2] == False:
+                 if source_man[f][0] == '-':
+                     addedDirs.append(f)
+@@ -190,28 +190,28 @@ class ManifestManager(object):
+                 modified_size += int(source_man[f][1])
+ 
+             if len(addedFiles) > 0:
+-                print "#------------------------------------------------------"
+-                print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
++                print("#------------------------------------------------------")
++                print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)))
+                 for f in addedFiles:
+-                    print "\t".join([self.formatData(int(source_man[f][1])), f])
++                    print("\t".join([self.formatData(int(source_man[f][1])), f]))
+ 
+             if len(addedDirs) > 0:
+-                print "#------------------------------------------------------"
+-                print "# Source contains %d new folders(s)" % (len(addedDirs))
++                print("#------------------------------------------------------")
++                print("# Source contains %d new folders(s)" % (len(addedDirs)))
+                 for f in addedDirs:
+-                    print f
++                    print(f)
+ 
+             if len(modified) > 0:
+-                print "#------------------------------------------------------"
+-                print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
++                print("#------------------------------------------------------")
++                print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)))
+                 for f in modified:
+-                    print f
++                    print(f)
+ 
+             if len(deleted) > 0:
+-                print "#------------------------------------------------------"
+-                print "# %d files have been deleted in the source:" % len(deleted)
++                print("#------------------------------------------------------")
++                print("# %d files have been deleted in the source:" % len(deleted))
+                 for f in deleted:
+-                    print f
++                    print(f)
+         else:
+             return (source,
+                     [(a, source_man[a]) for a in addedFiles],
+@@ -245,13 +245,13 @@ class ManifestManager(object):
+             for f in modified:
+                 total_size += int(f[1][1])
+             if total_size != 0:
+-                print "****************************************************************"
+-                print "%d new file(s) to be downloaded from source" % len(added_files)
+-                print "%d existing file(s) to be updated" % len(modified)
+-                print "%s will need to be downloaded" % self.formatData(total_size)
++                print("****************************************************************")
++                print("%d new file(s) to be downloaded from source" % len(added_files))
++                print("%d existing file(s) to be updated" % len(modified))
++                print("%s will need to be downloaded" % self.formatData(total_size))
+                 do_down = self.promptUserDownload()
+                 if not do_down:
+-                    print "Download aborted"
++                    print("Download aborted")
+ 
+         update_manifest = False
+         if do_down:
+@@ -262,13 +262,13 @@ class ManifestManager(object):
+                 self.makeSurePathExists(full_path)
+             for add in added_files:
+                 full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
+-                urllib.urlretrieve(source+add[0], full_path)
++                urllib.request.urlretrieve(source+add[0], full_path)
+             for modify in modified:
+                 full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
+-                urllib.urlretrieve(source+modify[0], full_path)
++                urllib.request.urlretrieve(source+modify[0], full_path)
+ 
+         if update_manifest:
+-            print "(re) creating manifest file (please be patient)"
++            print("(re) creating manifest file (please be patient)")
+             self.createManifest(localManifestLocation, manifestName=localManifestName)
+             
+         return True
+@@ -303,19 +303,19 @@ class ManifestManager(object):
+         input_not_ok = True
+         minimal=False
+         valid_responses = {'Y':True,'N':False}
+-        vrs = ",".join([x.lower() for x in valid_responses.keys()])
++        vrs = ",".join([x.lower() for x in list(valid_responses.keys())])
+         while(input_not_ok):
+             if(minimal):
+-                option = raw_input("Download? ("+vrs+") : ").upper()
++                option = input("Download? ("+vrs+") : ").upper()
+             else:
+-                option = raw_input("Confirm you want to download this data\n" \
++                option = input("Confirm you want to download this data\n" \
+                                    "Changes *WILL* be permanent\n" \
+                                    "Continue? ("+vrs+") : ").upper()
+             if(option in valid_responses):
+-                print "****************************************************************"
++                print("****************************************************************")
+                 return valid_responses[option]
+             else:
+-                print "ERROR: unrecognised choice '"+option+"'"
++                print("ERROR: unrecognised choice '"+option+"'")
+                 minimal = True
+ 
+     def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
+--- checkm/taxonParser.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/taxonParser.py
+@@ -73,8 +73,8 @@ class TaxonParser():
+                     numMarkers, numMarkerSets = markerSet.size()
+                     pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets])
+ 
+-        print ''
+-        print pTable.get_string()
++        print('')
++        print(pTable.get_string())
+ 
+     def markerSet(self, rank, taxon, markerFile):
+         """Obtain specified taxonomic-specific marker set."""
+--- checkm/uniqueMarkers.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/uniqueMarkers.py
+@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args):
+     query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1]))
+     query.append(' %s IS NULL' % ranks[len(args)])
+     query_string = 'AND'.join(query)
+-    print query_string
++    print(query_string)
+     result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string)
+     return result.fetchall()
+ 
+@@ -121,7 +121,7 @@ def doWork(args):
+                 markers_from_others[Id] += count
+ 
+         descriptive_markers = []
+-        for marker_id, _ in marker_in_taxon_mapping.items():
++        for marker_id, _ in list(marker_in_taxon_mapping.items()):
+             if marker_id in markers_from_others:
+                 fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count)
+                 if fraction_in_others <= args.exclude:
+@@ -135,7 +135,7 @@ def doWork(args):
+             des_markers.append(getDescriptiveMarkers(cur, i))
+ 
+         for des_acc, des_name in des_markers:
+-            print des_acc, des_name
++            print(des_acc, des_name)
+ 
+ if __name__ == '__main__':
+ 
+--- checkm/util/img.py.orig	2022-03-15 18:25:01 UTC
++++ checkm/util/img.py
+@@ -195,7 +195,7 @@ class IMG(object):
+         genomeIdsOfInterest = set()
+         for genomeId in metadata:
+             bKeep = True
+-            for r in xrange(0, len(searchTaxa)):
++            for r in range(0, len(searchTaxa)):
+                 if taxonStr == 'universal':
+                     bKeep = True
+                 elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'):
+@@ -222,8 +222,8 @@ class IMG(object):
+ 
+     def lineageStats(self, metadata, mostSpecificRank):
+         stats = {}
+-        for r in xrange(0, mostSpecificRank + 1):
+-            for _, data in metadata.iteritems():
++        for r in range(0, mostSpecificRank + 1):
++            for _, data in metadata.items():
+                 taxaStr = ';'.join(data['taxonomy'][0:r + 1])
+                 stats[taxaStr] = stats.get(taxaStr, 0) + 1
+ 
+@@ -231,9 +231,9 @@ class IMG(object):
+ 
+     def lineagesSorted(self, metadata, mostSpecificRank=6):
+         lineages = []
+-        for r in xrange(0, mostSpecificRank + 1):
++        for r in range(0, mostSpecificRank + 1):
+             taxa = set()
+-            for _, data in metadata.iteritems():
++            for _, data in metadata.items():
+                 if 'unclassified' not in data['taxonomy'][0:r + 1]:
+                     taxa.add(';'.join(data['taxonomy'][0:r + 1]))
+ 
+@@ -274,7 +274,7 @@ class IMG(object):
+                     geneIdToFamilyIds[geneId].add(clusterId)
+                     count[clusterId] = count.get(clusterId, 0) + 1
+ 
+-            for clusterId, c in count.iteritems():
++            for clusterId, c in count.items():
+                 if clusterId not in table:
+                     table[clusterId] = {}
+                 table[clusterId][genomeId] = c
+@@ -288,7 +288,7 @@ class IMG(object):
+ 
+     def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9):
+         idsToFilter = []
+-        for pfamId, genomeCounts in table.iteritems():
++        for pfamId, genomeCounts in table.items():
+             ubiquity = 0
+             singleCopy = 0
+             for genomeId in genomeIds:
+@@ -342,7 +342,7 @@ class IMG(object):
+             # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
+             # with GFF entries are considered.
+             familyIdToScaffoldIds = {}
+-            for pfamId, geneIds in pfamIdToGeneIds.iteritems():
++            for pfamId, geneIds in pfamIdToGeneIds.items():
+                 scaffolds = []
+                 for geneId in geneIds:
+                     scaffold = genePosition.get(geneId, None)
+@@ -352,7 +352,7 @@ class IMG(object):
+                 if scaffolds:
+                     familyIdToScaffoldIds[pfamId] = scaffolds
+ 
+-            for tigrId, geneIds in tigrIdToGeneIds.iteritems():
++            for tigrId, geneIds in tigrIdToGeneIds.items():
+                 scaffolds = []
+                 for geneId in geneIds:
+                     scaffold = genePosition.get(geneId, None)
+@@ -362,9 +362,9 @@ class IMG(object):
+                 if scaffold:
+                     familyIdToScaffoldIds[tigrId] = scaffolds
+         except:
+-            print '[BUG]: __genomeIdToClusterScaffold'
+-            print sys.exc_info()[0]
+-            print genomeId, geneId, tigrId, pfamId
++            print('[BUG]: __genomeIdToClusterScaffold')
++            print(sys.exc_info()[0])
++            print(genomeId, geneId, tigrId, pfamId)
+             sys.exit()
+ 
+         return familyIdToScaffoldIds
+@@ -400,7 +400,7 @@ class IMG(object):
+         seqs = readFasta(genomeFile)
+ 
+         seqLens = {}
+-        for seqId, seq in seqs.iteritems():
++        for seqId, seq in seqs.items():
+             seqLens[seqId] = len(seq)
+ 
+         return seqLens
+@@ -462,7 +462,7 @@ class IMG(object):
+             # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
+             # with GFF entries are considered.
+             familyIdToGenomePositions = {}
+-            for pfamId, geneIds in pfamIdToGeneIds.iteritems():
++            for pfamId, geneIds in pfamIdToGeneIds.items():
+                 positions = []
+                 for geneId in geneIds:
+                     position = genePosition.get(geneId, None)
+@@ -472,7 +472,7 @@ class IMG(object):
+                 if positions:
+                     familyIdToGenomePositions[pfamId] = positions
+ 
+-            for tigrId, geneIds in tigrIdToGeneIds.iteritems():
++            for tigrId, geneIds in tigrIdToGeneIds.items():
+                 positions = []
+                 for geneId in geneIds:
+                     position = genePosition.get(geneId, None)
+@@ -482,9 +482,9 @@ class IMG(object):
+                 if positions:
+                     familyIdToGenomePositions[tigrId] = positions
+         except:
+-            print '[BUG]: __genomeFamilyPositions'
+-            print sys.exc_info()[0]
+-            print genomeId, geneId, tigrId, pfamId
++            print('[BUG]: __genomeFamilyPositions')
++            print(sys.exc_info()[0])
++            print(genomeId, geneId, tigrId, pfamId)
+             sys.exit()
+ 
+         return familyIdToGenomePositions
author	Po-Chuan Hsieh <sunpoet@FreeBSD.org>	2022-03-25 13:32:00 +0000
committer	Po-Chuan Hsieh <sunpoet@FreeBSD.org>	2022-03-25 13:38:04 +0000
commit	c750cbac580143e48a23ed5af71ac671614b8015 (patch)
tree	23947368390016087221e9f9b685b9fc7325647b /biology/checkm
parent	f77ee7e82e76a434625ce15cdbe5f8358e1617e2 (diff)
download	ports-c750cbac580143e48a23ed5af71ac671614b8015.tar.gz ports-c750cbac580143e48a23ed5af71ac671614b8015.zip