aboutsummaryrefslogtreecommitdiff
path: root/biology/checkm
diff options
context:
space:
mode:
authorPo-Chuan Hsieh <sunpoet@FreeBSD.org>2022-03-25 13:32:00 +0000
committerPo-Chuan Hsieh <sunpoet@FreeBSD.org>2022-03-25 13:38:04 +0000
commitc750cbac580143e48a23ed5af71ac671614b8015 (patch)
tree23947368390016087221e9f9b685b9fc7325647b /biology/checkm
parentf77ee7e82e76a434625ce15cdbe5f8358e1617e2 (diff)
downloadports-c750cbac580143e48a23ed5af71ac671614b8015.tar.gz
ports-c750cbac580143e48a23ed5af71ac671614b8015.zip
biology/checkm: Fix build with setuptools 58.0.0+
With hat: python
Diffstat (limited to 'biology/checkm')
-rw-r--r--biology/checkm/Makefile2
-rw-r--r--biology/checkm/files/patch-2to3648
2 files changed, 649 insertions, 1 deletions
diff --git a/biology/checkm/Makefile b/biology/checkm/Makefile
index 302c1ba374d0..6bb02b8266de 100644
--- a/biology/checkm/Makefile
+++ b/biology/checkm/Makefile
@@ -15,7 +15,7 @@ RUN_DEPENDS= ${PYNUMPY} \
${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \
${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR}
-USES= python:3.7+
+USES= dos2unix python:3.7+
USE_GITHUB= yes
GH_ACCOUNT= Ecogenomics
USE_PYTHON= distutils noflavors autoplist
diff --git a/biology/checkm/files/patch-2to3 b/biology/checkm/files/patch-2to3
new file mode 100644
index 000000000000..04b37972c86e
--- /dev/null
+++ b/biology/checkm/files/patch-2to3
@@ -0,0 +1,648 @@
+--- checkm/binTools.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/binTools.py
+@@ -26,7 +26,7 @@ import gzip
+
+ import numpy as np
+
+-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest
++from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest
+ from checkm.util.seqUtils import readFasta, writeFasta, baseCount
+ from checkm.genomicSignatures import GenomicSignatures
+ from checkm.prodigal import ProdigalGeneFeatureParser
+@@ -123,34 +123,34 @@ class BinTools():
+ seqId = line[1:].split(None, 1)[0]
+
+ if seqId in seqIds:
+- print ' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)
++ print(' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId))
+ seqIds.add(seqId)
+
+ binSeqs[binId] = seqIds
+
+ # check for sequences assigned to multiple bins
+ bDuplicates = False
+- binIds = binSeqs.keys()
+- for i in xrange(0, len(binIds)):
+- for j in xrange(i + 1, len(binIds)):
++ binIds = list(binSeqs.keys())
++ for i in range(0, len(binIds)):
++ for j in range(i + 1, len(binIds)):
+ seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]]))
+
+ if len(seqInter) > 0:
+ bDuplicates = True
+- print ' Sequences shared between %s and %s: ' % (binIds[i], binIds[j])
++ print(' Sequences shared between %s and %s: ' % (binIds[i], binIds[j]))
+ for seqId in seqInter:
+- print ' ' + seqId
+- print ''
++ print(' ' + seqId)
++ print('')
+
+ if not bDuplicates:
+- print ' No sequences assigned to multiple bins.'
++ print(' No sequences assigned to multiple bins.')
+
+ def gcDist(self, seqs):
+ """GC statistics for bin."""
+ GCs = []
+ gcTotal = 0
+ basesTotal = 0
+- for _, seq in seqs.iteritems():
++ for _, seq in seqs.items():
+ a, c, g, t = baseCount(seq)
+ gc = g + c
+ bases = a + c + g + t
+@@ -171,7 +171,7 @@ class BinTools():
+
+ codingBasesTotal = 0
+ basesTotal = 0
+- for seqId, seq in seqs.iteritems():
++ for seqId, seq in seqs.items():
+ codingBases = prodigalParser.codingBases(seqId)
+
+ CDs.append(float(codingBases) / len(seq))
+@@ -186,11 +186,11 @@ class BinTools():
+ def binTetraSig(self, seqs, tetraSigs):
+ """Tetranucleotide signature for bin. """
+ binSize = 0
+- for _, seq in seqs.iteritems():
++ for _, seq in seqs.items():
+ binSize += len(seq)
+
+ bInit = True
+- for seqId, seq in seqs.iteritems():
++ for seqId, seq in seqs.items():
+ weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize)
+ if bInit:
+ binSig = weightedTetraSig
+@@ -247,32 +247,32 @@ class BinTools():
+ meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser)
+
+ # find keys into GC and CD distributions
+- closestGC = findNearest(np.array(gcBounds.keys()), meanGC)
+- sampleSeqLen = gcBounds[closestGC].keys()[0]
++ closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC)
++ sampleSeqLen = list(gcBounds[closestGC].keys())[0]
+ d = gcBounds[closestGC][sampleSeqLen]
+- gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
+- gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0)
++ gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
++ gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0)
+
+- closestCD = findNearest(np.array(cdBounds.keys()), meanCD)
+- sampleSeqLen = cdBounds[closestCD].keys()[0]
++ closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD)
++ sampleSeqLen = list(cdBounds[closestCD].keys())[0]
+ d = cdBounds[closestCD][sampleSeqLen]
+- cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0)
++ cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0)
+
+- tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution)
++ tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution)
+
+ index = 0
+- for seqId, seq in seqs.iteritems():
++ for seqId, seq in seqs.items():
+ seqLen = len(seq)
+
+ # find GC, CD, and TD bounds
+- closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen)
++ closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen)
+ gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey]
+ gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey]
+
+- closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen)
++ closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen)
+ cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey]
+
+- closestSeqLen = findNearest(tdBounds.keys(), seqLen)
++ closestSeqLen = findNearest(list(tdBounds.keys()), seqLen)
+ tdBound = tdBounds[closestSeqLen][tdBoundKey]
+
+ outlyingDists = []
+--- checkm/checkmData.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/checkmData.py
+@@ -85,11 +85,11 @@ class DBConfig(object):
+ """Work out if we have permission to write to the CheckM config before attempting to make changes"""
+ try:
+ open(self.configFile, 'a')
+- except IOError, e:
+- print "You do not seem to have permission to edit the checkm config file"
+- print "located at %s" % self.configFile
+- print "Please try again with updated privileges. Error was:\n"
+- print e
++ except IOError as e:
++ print("You do not seem to have permission to edit the checkm config file")
++ print("located at %s" % self.configFile)
++ print("Please try again with updated privileges. Error was:\n")
++ print(e)
+ return False
+ return True
+
+@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager):
+ else:
+ path = os.path.abspath(os.path.expanduser(path))
+
+- print ""
++ print("")
+ if os.path.exists(path):
+ # path exists
+ if os.access(path, os.W_OK):
+ # path is writable
+ path_set = True
+- print "Path [%s] exists and you have permission to write to this folder." % path
++ print("Path [%s] exists and you have permission to write to this folder." % path)
+ else:
+- print "Path [%s] exists but you do not have permission to write to this folder." % path
++ print("Path [%s] exists but you do not have permission to write to this folder." % path)
+ else:
+ # path does not exist, try to make it
+ "Path [%s] does not exist so I will attempt to create it" % path
+ try:
+ self.makeSurePathExists(path)
+- print "Path [%s] has been created and you have permission to write to this folder." % path
++ print("Path [%s] has been created and you have permission to write to this folder." % path)
+ path_set = True
+ except Exception:
+- print "Unable to make the folder, Error was: %s" % sys.exc_info()[0]
++ print("Unable to make the folder, Error was: %s" % sys.exc_info()[0])
+ minimal = True
+
+ # (re)make the manifest file
+- print "(re) creating manifest file (please be patient)."
++ print("(re) creating manifest file (please be patient).")
+ self.createManifest(path, self.config.values["localManifestName"])
+
+ return path
+@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager):
+ def checkPermissions(self):
+ """See if the user has permission to write to the data directory"""
+ if not os.access(self.config.values["dataRoot"], os.W_OK):
+- print "You do not seem to have permission to edit the CheckM data folder"
+- print "located at %s" % self.config.values["dataRoot"]
++ print("You do not seem to have permission to edit the CheckM data folder")
++ print("located at %s" % self.config.values["dataRoot"])
+ return False
+
+ return True
+--- checkm/coverage.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/coverage.py
+@@ -62,7 +62,7 @@ class Coverage():
+ binId = binIdFromFilename(binFile)
+
+ seqs = readFasta(binFile)
+- for seqId, seq in seqs.iteritems():
++ for seqId, seq in seqs.items():
+ seqIdToBinId[seqId] = binId
+ seqIdToSeqLen[seqId] = len(seq)
+
+@@ -97,12 +97,12 @@ class Coverage():
+ print(header)
+
+ # get length of all seqs
+- for bamFile, seqIds in coverageInfo.iteritems():
+- for seqId in seqIds.keys():
++ for bamFile, seqIds in coverageInfo.items():
++ for seqId in list(seqIds.keys()):
+ seqIdToSeqLen[seqId] = seqIds[seqId].seqLen
+
+ # write coverage stats for all scaffolds to file
+- for seqId, seqLen in seqIdToSeqLen.iteritems():
++ for seqId, seqLen in seqIdToSeqLen.items():
+ rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen)
+ for bamFile in bamFiles:
+ bamId = binIdFromFilename(bamFile)
+@@ -171,7 +171,7 @@ class Coverage():
+ writeProc.join()
+ except:
+ # make sure all processes are terminated
+- print traceback.format_exc()
++ print(traceback.format_exc())
+ for p in workerProc:
+ p.terminate()
+
+@@ -271,16 +271,16 @@ class Coverage():
+ if self.logger.getEffectiveLevel() <= logging.INFO:
+ sys.stderr.write('\n')
+
+- print ''
+- print ' # total reads: %d' % totalReads
+- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
+- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
+- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
+- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
+- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
+- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
+- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
+- print ''
++ print('')
++ print(' # total reads: %d' % totalReads)
++ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
++ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
++ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
++ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
++ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
++ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
++ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
++ print('')
+
+ def parseCoverage(self, coverageFile):
+ """Read coverage information from file."""
+@@ -301,7 +301,7 @@ class Coverage():
+ if seqId not in coverageStats[binId]:
+ coverageStats[binId][seqId] = {}
+
+- for i in xrange(3, len(lineSplit), 3):
++ for i in range(3, len(lineSplit), 3):
+ bamId = lineSplit[i]
+ coverage = float(lineSplit[i + 1])
+ coverageStats[binId][seqId][bamId] = coverage
+@@ -325,7 +325,7 @@ class Coverage():
+
+ # calculate mean coverage (weighted by scaffold length)
+ # for each bin under each BAM file
+- for i in xrange(3, len(lineSplit), 3):
++ for i in range(3, len(lineSplit), 3):
+ bamId = lineSplit[i]
+ coverage = float(lineSplit[i + 1])
+ binCoverages[binId][bamId].append(coverage)
+@@ -341,13 +341,13 @@ class Coverage():
+
+ profiles = defaultdict(dict)
+ for binId in binStats:
+- for bamId, stats in binStats[binId].iteritems():
++ for bamId, stats in binStats[binId].items():
+ binLength, meanBinCoverage = stats
+ coverages = binCoverages[binId][bamId]
+
+ varCoverage = 0
+ if len(coverages) > 1:
+- varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages))
++ varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages])
+
+ profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)]
+
+--- checkm/coverageWindows.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/coverageWindows.py
+@@ -188,10 +188,10 @@ class CoverageWindows():
+ try:
+ end += windowSize
+ except:
+- print '*****************'
+- print end
+- print windowSize
+- print '******************'
++ print('*****************')
++ print(end)
++ print(windowSize)
++ print('******************')
+
+ coverage = float(sum(readLoader.coverage)) / seqLen
+
+@@ -239,13 +239,13 @@ class CoverageWindows():
+ if self.logger.getEffectiveLevel() <= logging.INFO:
+ sys.stderr.write('\n')
+
+- print ''
+- print ' # total reads: %d' % totalReads
+- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)
+- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)
+- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)
+- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)
+- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)
+- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)
+- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)
+- print ''
++ print('')
++ print(' # total reads: %d' % totalReads)
++ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads))
++ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads))
++ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads))
++ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads))
++ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads))
++ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads))
++ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads))
++ print('')
+--- checkm/manifestManager.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/manifestManager.py
+@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest"
+ # system includes
+ import os
+ import hashlib
+-import urllib2
+-import urllib
++import urllib.request, urllib.error, urllib.parse
++import urllib.request, urllib.parse, urllib.error
+ import shutil
+ import errno
+
+@@ -121,15 +121,15 @@ class ManifestManager(object):
+ source = ""
+ # first we assume it is remote
+ try:
+- s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
++ s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
+ source = sourceManifestLocation + "/"
+ except ValueError:
+ # then it is probably a file
+ s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
+ source = os.path.join(sourceManifestLocation) + os.path.sep
+- except urllib2.URLError:
++ except urllib.error.URLError:
+ # problems connecting to server, perhaps user is behind a proxy or firewall
+- print "Error: failed to connect to server."
++ print("Error: failed to connect to server.")
+ return (None, None, None, None, None)
+
+ first_line = True
+@@ -140,11 +140,11 @@ class ManifestManager(object):
+ # get the type of the manifest
+ s_type = self.getManType(line)
+ if s_type != l_type:
+- print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
++ print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type))
+ return (None, None, None, None, None)
+ else:
+ # no type specified
+- print "Error: type of source manifest is not specified. Is this a valid manifest file?"
++ print("Error: type of source manifest is not specified. Is this a valid manifest file?")
+ return (None, None, None, None, None)
+
+ self.type = l_type
+@@ -174,7 +174,7 @@ class ManifestManager(object):
+ deleted.append(fields[0])
+
+ # check for new files
+- for f in source_man.keys():
++ for f in list(source_man.keys()):
+ if source_man[f][2] == False:
+ if source_man[f][0] == '-':
+ addedDirs.append(f)
+@@ -190,28 +190,28 @@ class ManifestManager(object):
+ modified_size += int(source_man[f][1])
+
+ if len(addedFiles) > 0:
+- print "#------------------------------------------------------"
+- print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
++ print("#------------------------------------------------------")
++ print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)))
+ for f in addedFiles:
+- print "\t".join([self.formatData(int(source_man[f][1])), f])
++ print("\t".join([self.formatData(int(source_man[f][1])), f]))
+
+ if len(addedDirs) > 0:
+- print "#------------------------------------------------------"
+- print "# Source contains %d new folders(s)" % (len(addedDirs))
++ print("#------------------------------------------------------")
++ print("# Source contains %d new folders(s)" % (len(addedDirs)))
+ for f in addedDirs:
+- print f
++ print(f)
+
+ if len(modified) > 0:
+- print "#------------------------------------------------------"
+- print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
++ print("#------------------------------------------------------")
++ print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)))
+ for f in modified:
+- print f
++ print(f)
+
+ if len(deleted) > 0:
+- print "#------------------------------------------------------"
+- print "# %d files have been deleted in the source:" % len(deleted)
++ print("#------------------------------------------------------")
++ print("# %d files have been deleted in the source:" % len(deleted))
+ for f in deleted:
+- print f
++ print(f)
+ else:
+ return (source,
+ [(a, source_man[a]) for a in addedFiles],
+@@ -245,13 +245,13 @@ class ManifestManager(object):
+ for f in modified:
+ total_size += int(f[1][1])
+ if total_size != 0:
+- print "****************************************************************"
+- print "%d new file(s) to be downloaded from source" % len(added_files)
+- print "%d existing file(s) to be updated" % len(modified)
+- print "%s will need to be downloaded" % self.formatData(total_size)
++ print("****************************************************************")
++ print("%d new file(s) to be downloaded from source" % len(added_files))
++ print("%d existing file(s) to be updated" % len(modified))
++ print("%s will need to be downloaded" % self.formatData(total_size))
+ do_down = self.promptUserDownload()
+ if not do_down:
+- print "Download aborted"
++ print("Download aborted")
+
+ update_manifest = False
+ if do_down:
+@@ -262,13 +262,13 @@ class ManifestManager(object):
+ self.makeSurePathExists(full_path)
+ for add in added_files:
+ full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
+- urllib.urlretrieve(source+add[0], full_path)
++ urllib.request.urlretrieve(source+add[0], full_path)
+ for modify in modified:
+ full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
+- urllib.urlretrieve(source+modify[0], full_path)
++ urllib.request.urlretrieve(source+modify[0], full_path)
+
+ if update_manifest:
+- print "(re) creating manifest file (please be patient)"
++ print("(re) creating manifest file (please be patient)")
+ self.createManifest(localManifestLocation, manifestName=localManifestName)
+
+ return True
+@@ -303,19 +303,19 @@ class ManifestManager(object):
+ input_not_ok = True
+ minimal=False
+ valid_responses = {'Y':True,'N':False}
+- vrs = ",".join([x.lower() for x in valid_responses.keys()])
++ vrs = ",".join([x.lower() for x in list(valid_responses.keys())])
+ while(input_not_ok):
+ if(minimal):
+- option = raw_input("Download? ("+vrs+") : ").upper()
++ option = input("Download? ("+vrs+") : ").upper()
+ else:
+- option = raw_input("Confirm you want to download this data\n" \
++ option = input("Confirm you want to download this data\n" \
+ "Changes *WILL* be permanent\n" \
+ "Continue? ("+vrs+") : ").upper()
+ if(option in valid_responses):
+- print "****************************************************************"
++ print("****************************************************************")
+ return valid_responses[option]
+ else:
+- print "ERROR: unrecognised choice '"+option+"'"
++ print("ERROR: unrecognised choice '"+option+"'")
+ minimal = True
+
+ def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
+--- checkm/taxonParser.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/taxonParser.py
+@@ -73,8 +73,8 @@ class TaxonParser():
+ numMarkers, numMarkerSets = markerSet.size()
+ pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets])
+
+- print ''
+- print pTable.get_string()
++ print('')
++ print(pTable.get_string())
+
+ def markerSet(self, rank, taxon, markerFile):
+ """Obtain specified taxonomic-specific marker set."""
+--- checkm/uniqueMarkers.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/uniqueMarkers.py
+@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args):
+ query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1]))
+ query.append(' %s IS NULL' % ranks[len(args)])
+ query_string = 'AND'.join(query)
+- print query_string
++ print(query_string)
+ result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string)
+ return result.fetchall()
+
+@@ -121,7 +121,7 @@ def doWork(args):
+ markers_from_others[Id] += count
+
+ descriptive_markers = []
+- for marker_id, _ in marker_in_taxon_mapping.items():
++ for marker_id, _ in list(marker_in_taxon_mapping.items()):
+ if marker_id in markers_from_others:
+ fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count)
+ if fraction_in_others <= args.exclude:
+@@ -135,7 +135,7 @@ def doWork(args):
+ des_markers.append(getDescriptiveMarkers(cur, i))
+
+ for des_acc, des_name in des_markers:
+- print des_acc, des_name
++ print(des_acc, des_name)
+
+ if __name__ == '__main__':
+
+--- checkm/util/img.py.orig 2022-03-15 18:25:01 UTC
++++ checkm/util/img.py
+@@ -195,7 +195,7 @@ class IMG(object):
+ genomeIdsOfInterest = set()
+ for genomeId in metadata:
+ bKeep = True
+- for r in xrange(0, len(searchTaxa)):
++ for r in range(0, len(searchTaxa)):
+ if taxonStr == 'universal':
+ bKeep = True
+ elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'):
+@@ -222,8 +222,8 @@ class IMG(object):
+
+ def lineageStats(self, metadata, mostSpecificRank):
+ stats = {}
+- for r in xrange(0, mostSpecificRank + 1):
+- for _, data in metadata.iteritems():
++ for r in range(0, mostSpecificRank + 1):
++ for _, data in metadata.items():
+ taxaStr = ';'.join(data['taxonomy'][0:r + 1])
+ stats[taxaStr] = stats.get(taxaStr, 0) + 1
+
+@@ -231,9 +231,9 @@ class IMG(object):
+
+ def lineagesSorted(self, metadata, mostSpecificRank=6):
+ lineages = []
+- for r in xrange(0, mostSpecificRank + 1):
++ for r in range(0, mostSpecificRank + 1):
+ taxa = set()
+- for _, data in metadata.iteritems():
++ for _, data in metadata.items():
+ if 'unclassified' not in data['taxonomy'][0:r + 1]:
+ taxa.add(';'.join(data['taxonomy'][0:r + 1]))
+
+@@ -274,7 +274,7 @@ class IMG(object):
+ geneIdToFamilyIds[geneId].add(clusterId)
+ count[clusterId] = count.get(clusterId, 0) + 1
+
+- for clusterId, c in count.iteritems():
++ for clusterId, c in count.items():
+ if clusterId not in table:
+ table[clusterId] = {}
+ table[clusterId][genomeId] = c
+@@ -288,7 +288,7 @@ class IMG(object):
+
+ def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9):
+ idsToFilter = []
+- for pfamId, genomeCounts in table.iteritems():
++ for pfamId, genomeCounts in table.items():
+ ubiquity = 0
+ singleCopy = 0
+ for genomeId in genomeIds:
+@@ -342,7 +342,7 @@ class IMG(object):
+ # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
+ # with GFF entries are considered.
+ familyIdToScaffoldIds = {}
+- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
++ for pfamId, geneIds in pfamIdToGeneIds.items():
+ scaffolds = []
+ for geneId in geneIds:
+ scaffold = genePosition.get(geneId, None)
+@@ -352,7 +352,7 @@ class IMG(object):
+ if scaffolds:
+ familyIdToScaffoldIds[pfamId] = scaffolds
+
+- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
++ for tigrId, geneIds in tigrIdToGeneIds.items():
+ scaffolds = []
+ for geneId in geneIds:
+ scaffold = genePosition.get(geneId, None)
+@@ -362,9 +362,9 @@ class IMG(object):
+ if scaffold:
+ familyIdToScaffoldIds[tigrId] = scaffolds
+ except:
+- print '[BUG]: __genomeIdToClusterScaffold'
+- print sys.exc_info()[0]
+- print genomeId, geneId, tigrId, pfamId
++ print('[BUG]: __genomeIdToClusterScaffold')
++ print(sys.exc_info()[0])
++ print(genomeId, geneId, tigrId, pfamId)
+ sys.exit()
+
+ return familyIdToScaffoldIds
+@@ -400,7 +400,7 @@ class IMG(object):
+ seqs = readFasta(genomeFile)
+
+ seqLens = {}
+- for seqId, seq in seqs.iteritems():
++ for seqId, seq in seqs.items():
+ seqLens[seqId] = len(seq)
+
+ return seqLens
+@@ -462,7 +462,7 @@ class IMG(object):
+ # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs
+ # with GFF entries are considered.
+ familyIdToGenomePositions = {}
+- for pfamId, geneIds in pfamIdToGeneIds.iteritems():
++ for pfamId, geneIds in pfamIdToGeneIds.items():
+ positions = []
+ for geneId in geneIds:
+ position = genePosition.get(geneId, None)
+@@ -472,7 +472,7 @@ class IMG(object):
+ if positions:
+ familyIdToGenomePositions[pfamId] = positions
+
+- for tigrId, geneIds in tigrIdToGeneIds.iteritems():
++ for tigrId, geneIds in tigrIdToGeneIds.items():
+ positions = []
+ for geneId in geneIds:
+ position = genePosition.get(geneId, None)
+@@ -482,9 +482,9 @@ class IMG(object):
+ if positions:
+ familyIdToGenomePositions[tigrId] = positions
+ except:
+- print '[BUG]: __genomeFamilyPositions'
+- print sys.exc_info()[0]
+- print genomeId, geneId, tigrId, pfamId
++ print('[BUG]: __genomeFamilyPositions')
++ print(sys.exc_info()[0])
++ print(genomeId, geneId, tigrId, pfamId)
+ sys.exit()
+
+ return familyIdToGenomePositions