aboutsummaryrefslogtreecommitdiff
path: root/contrib/file/magic/Magdir/archive
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/file/magic/Magdir/archive')
-rw-r--r--contrib/file/magic/Magdir/archive574
1 files changed, 549 insertions, 25 deletions
diff --git a/contrib/file/magic/Magdir/archive b/contrib/file/magic/Magdir/archive
index fb535ac0ff26..6e1f9678e7ac 100644
--- a/contrib/file/magic/Magdir/archive
+++ b/contrib/file/magic/Magdir/archive
@@ -1,5 +1,5 @@
#------------------------------------------------------------------------------
-# $File: archive,v 1.162 2022/05/27 21:27:59 christos Exp $
+# $File: archive,v 1.193 2023/07/27 17:55:58 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
@@ -25,7 +25,18 @@
>>>>>>155 ubyte&0xDF =0
# space or ascii digit 0 at start of check sum
>>>>>>>148 ubyte&0xEF =0x20
->>>>>>>>0 use tar-file
+# FOR DEBUGGING:
+#>>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp) NAME "%s"
+# check for 1st image main name with digits used for sorting
+# and for name extension case insensitive like: PNG JPG JPEG TIF TIFF GIF BMP
+>>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp)
+>>>>>>>>>0 use tar-cbt
+# check for 1st member name with ovf suffix
+>>>>>>>>0 regex \^.{1,96}[.](ovf)
+>>>>>>>>>0 use tar-ova
+# if 1st member name without digits and without used image suffix and without *.ovf then it is a TAR archive
+>>>>>>>>0 default x
+>>>>>>>>>0 use tar-file
# minimal check and then display tar archive information which can also be
# embedded inside others like Android Backup, Clam AntiVirus database
0 name tar-file
@@ -146,6 +157,34 @@
>>508 default x
# padding[255] in old tar sometimes comment field
>>>257 string >\0 \b, comment: %-.40s
+# Summary: Comic Book Archive *.CBT with TAR format
+# URL: https://en.wikipedia.org/wiki/Comic_book_archive
+# http://fileformats.archiveteam.org/wiki/Comic_Book_Archive
+# Note: there exist also RAR, ZIP, ACE and 7Z packed variants
+0 name tar-cbt
+>0 string x Comic Book archive, tar archive
+#!:mime application/x-tar
+!:mime application/vnd.comicbook
+#!:mime application/vnd.comicbook+tar
+!:ext cbt
+# name[100] probably like: 19.jpg 0001.png 0002.png
+# or maybe like ComicInfo.xml
+>0 string >\0 \b, 1st image %-.60s
+# Summary: Open Virtualization Format *.OVF with disk images and more packed as TAR archive *.OVA
+# From: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/Open_Virtualization_Format
+# http://fileformats.archiveteam.org/wiki/OVF_(Open_Virtualization_Format)
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/o/ova.trid.xml
+# Note: called "Open Virtualization Format package" by TrID
+# assuming *.ovf comes first
+0 name tar-ova
+>0 string x Open Virtualization Format Archive
+#!:mime application/x-ustar
+# http://extension.nirsoft.net/ova
+!:mime application/x-virtualbox-ova
+!:ext ova
+# assuming name[100] like: DOS-0.9.ovf FreeDOS_1.ovf Win98SE_DE.ovf
+>0 string >\0 \b, with %-.60s
# Incremental snapshot gnu-tar format from:
# https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
@@ -163,16 +202,88 @@
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
-0 short 070707 cpio archive
+# URL: http://fileformats.archiveteam.org/wiki/Cpio
+# https://en.wikipedia.org/wiki/Cpio
+# Reference: https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt
+# Update: Joerg Jenderek
+#
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin.trid.xml
+# Note: called "CPIO archive (binary)" by TrID, "cpio/Binary LE" by 7-Zip and "CPIO" by DROID via PUID fmt/635
+0 short 070707
+# skip DROID fmt-635-signature-id-960.cpio by looking for pathname of 1st entry
+>26 string >\0 cpio archive
!:mime application/x-cpio
+# https://download.opensuse.org/distribution/leap/15.4/iso/openSUSE-Leap-15.4-NET-x86_64-Media.iso
+# boot/x86_64/loader/bootlogo
+# message.cpi
+!:ext /cpio/cpi
+>>0 use cpio-bin
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin-sw.trid.xml
+# Note: called "CPIO archive (byte swapped binary)" by TrID and "Cpio/Binary BE" by 7-Zip
0 short 0143561 byte-swapped cpio archive
!:mime application/x-cpio # encoding: swapped
+# https://telparia.com/fileFormatSamples/archive/cpio/skeleton2.cpio
+!:ext cpio
+>0 use cpio-bin-be
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio.trid.xml
+# Note: called "CPIO archive (portable)" by TrID, "cpio/Portable ASCII" by 7-Zip and "cpio/odc" by GNU cpio
0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
!:mime application/x-cpio
+# https://telparia.com/fileFormatSamples/archive/cpio/ pthreads-1.60B5.osr5src.cpio cinema.cpi VOL.000.008 VOL.000.012
+!:ext cpio/cpi/008/012
+# Note: called "CPIO archive (portable)" by TrID, "cpio/New ASCII" by 7-Zip and "cpio/newc" by GNU cpio
0 string 070701 ASCII cpio archive (SVR4 with no CRC)
!:mime application/x-cpio
+# https://telparia.com/fileFormatSamples/archive/cpio/MainActor-2.06.3.cpio
+!:ext cpio
+# Note: called "CPIO archive (portable)" by TrID, "cpio/New CRC" by 7-Zip and "cpio/crc" by GNU cpio
0 string 070702 ASCII cpio archive (SVR4 with CRC)
!:mime application/x-cpio
+# http://ftp.gnu.org/gnu/tar/tar-1.27.cpio.gz
+# https://telparia.com/fileFormatSamples/archive/cpio/pcmcia
+!:ext /cpio
+# display information of old binary cpio archive
+# Note: verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
+# `cpio -ivt --numeric-uid-gid --file=clam.bin-le.cpio`
+0 name cpio-bin
+# c_dev; device number; WHAT IS THAT?
+>2 uleshort x \b; device %u
+# c_ino; truncated inode number; use `ls --inode`
+>4 uleshort x \b, inode %u
+# c_mode; mode specifies permissions and file type like: ?622~?rw-r--r-- by `ls -l`
+>6 uleshort x \b, mode %o
+# c_uid; numeric user id; use `ls --numeric-uid-gid`
+>8 uleshort x \b, uid %u
+# c_gid; numeric group id
+>10 uleshort x \b, gid %u
+# c_nlink; links to this file; directories at least 2
+>12 uleshort >1 \b, %u links
+# c_rdev; device number for block and character entries; zero for all other entries by writers
+# like 0x0440 for /dev/ttyS0
+>14 uleshort >0 \b, device %#4.4x
+# c_mtime[2]; modification time in seconds since 1 January 1970; most-significant 16 bits first
+>16 medate x \b, modified %s
+# c_filesize[2]; size of pathname; most-significant 16 bits first like: 544
+>22 melong x \b, %u bytes
+# c_namesize; bytes in the pathname that follows the header like: 9
+#>20 uleshort x \b, namesize %u
+# pathname of entry like: "clam.exe"
+>26 string x "%s"
+# display information of old binary byte swapped cpio archive
+# Note: verfied by 7-Zip `7z l -tcpio -slt *.cpio` and
+# `LANGUAGE=C cpio -ivt --numeric-uid-gid --file=clam.bin-be.cpio`
+0 name cpio-bin-be
+>2 ubeshort x \b; device %u
+>4 ubeshort x \b, inode %u
+>6 ubeshort x \b, mode %o
+>8 ubeshort x \b, uid %u
+>10 ubeshort x \b, gid %u
+>12 ubeshort >1 \b, %u links
+>14 ubeshort >0 \b, device %#4.4x
+>16 bedate x \b, modified %s
+>22 ubelong x \b, %u bytes
+#>20 ubeshort x \b, namesize %u
+>26 string x "%s"
#
# Various archive formats used by various versions of the "ar"
@@ -249,7 +360,8 @@
#>>68 string x (format %.3s)
>68 string =2.0\n
# 2nd archive name=control archive name like control.tar.gz or control.tar.xz
->>72 string >\0 \b, with %.14s
+# or control.tar.zst
+>>72 string >\0 \b, with %.15s
# look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma}
>>0 search/0x93e4f data.tar. \b, data compression
# the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised
@@ -484,11 +596,12 @@
>>>>0 use ttcomp
0 string \1\4
# TODO:
-# skip Commodore PET BASIC 4.0 program *.prg
-# variant ASCII, 1K dictionary (strength=48=50-2). With strength=49 wrong order! WHY?
# skip shared library (strength=50) handled by ./ibm6000
!:strength -2
->0 use ttcomp
+# skip Commodore PET BASIC programs (Mastermind.prg) with last 3 nil bytes (\0~end of line followed by 0000h line offset)
+#>-4 ubelong x LAST_BYTES=%8.8x
+>-4 ubelong&0x00FFffFF !0
+>>0 use ttcomp
# display information of TTComp archive
0 name ttcomp
# (version 5.25) labeled the entry as "TTComp archive data"
@@ -731,6 +844,88 @@
!:ext ??$
>>8 ulelong >0 \b, original size: %u bytes
+# Summary: lzss compressed/EDI Pack
+# From: Joerg Jenderek
+# URL: http://fileformats.archiveteam.org/wiki/EDI_Install_packed_file
+# Note: called "EDI Install LZS compressed data" by TrID and verified by
+# command like `deark -l -m edi_pack -d2 BOOK01A.IC$` as "EDI Pack LZSS1"
+0 string EDILZSS
+>7 string 1
+# look for point character before orginal file name extension
+>>8 search/9/b .
+# check suffix of possible orginal file anme
+#>>>&0 ubelong x SUFFIX=%8.8x
+# samples without valid character after point in original file name field like: FENNEL.LZS PLANTAIN.LZS
+>>>&0 ubyte <0x20
+>>>>0 use edi-lzs
+# samples with valid character after point in original file name field
+>>>&0 ubyte >0x1F
+# check 2nd charcter of suffix
+#>>>>&0 ubyte x 2ND_SUFFIX=%x
+# sample with one valid character after point followed by \0 in original file name field like: SPELMATE.H$
+>>>>&0 ubyte =0
+>>>>>0 use edi-pack
+>>>>&0 ubyte >0x1F
+# check 3rd charcter of suffix
+#>>>>>&0 ubyte x 3RD_SUFFIX=%x
+# no sample with 2 valid characters after point followed by \0 in original file name field
+>>>>>&0 ubyte =0
+>>>>>>0 use edi-pack
+# samples with valid 3rd character after point in original file name field
+>>>>>&0 ubyte >0x1F
+# sample with 3 valid character after point followed by \0 in original file name field like: BOOK01A.IC$ CTL3D.DL$
+>>>>>>&0 ubyte =0
+>>>>>>>0 use edi-pack
+# sample with 3 valid character after point followed by no \0 in original file name field like: HERBTEXT.LZS
+>>>>>>&0 ubyte !0
+>>>>>>>0 use edi-lzs
+# no sample with invalid 3rd character after point in original file name field
+>>>>>&0 default x
+>>>>>>0 use edi-lzs
+# sample with invalid 2nd character after point in original file name field like: LACERATE.LZS SPLINTER.LZS
+>>>>&0 default x
+>>>>>0 use edi-lzs
+# sample without point character in original file name field like GUNSHOT.LZS
+>>8 default x
+>>>0 use edi-lzs
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/e/edi-lzss2.trid.xml
+# Note: called "EDI Install Pro LZSS2 compressed data" by TrID and verified by
+# command like `deark -l -m edi_pack -d2 4WAY.WA$` as "EDI Pack LZSS2"
+>7 string 2 EDI LZSS2 packed
+#!:mime application/octet-stream
+!:mime application/x-edi-pack-lzss
+# the name of a compressed file often ends in character '$' or '_'
+!:ext ??$/??_
+# original filename, NUL-terminated, padded to 13 bytes like: mci.vbx 4way.wav skymap.exe cmdialog.vbx
+>>8 string x "%-0.13s"
+# original file size, as a 4-byte integer.
+>>21 ulelong x \b, %u bytes
+# compressed data like: ff5249464606ec00 ff4d5aa601010000
+>>>25 ubequad x \b, data %#16.16llx...
+0 name edi-pack
+# Note: verified by command like `deark -l -d2 SPELMATE.H$` as "EDI Pack LZSS1"
+# original filename, NUL-terminated, padded to 13 bytes like: ctl3d.dll spelmate.h filemenu.rc owl.def index-it.exe
+# but not like \377Aloe.lzs\273 (HERBTEXT.LZS)
+>8 string x EDI LZSS packed "%-.13s"
+#!:mime application/octet-stream
+!:mime application/x-edi-pack-lzss
+# the name of a compressed file often ends in character '$' or '_'
+!:ext ??$/?$
+# compressed data like: f7000001eff02020 ff4d5aa900020000 ff2f2a207370656c
+>21 ubequad x \b, data %#16.16llx...
+# URL: http://fileformats.archiveteam.org/wiki/EDI_LZSSLib
+# Note: verified partly by command like `deark -l -m edi_pack -d2 GUNSHOT.LZS` as "EDI LZSSLib"
+0 name edi-lzs
+# Note: verified by command like `deark -l -d2 GUNSHOT.LZS` as "EDI LZSSLib"
+# no original filename looks like: \277BM\226.\0 \277BM.n\001 \277BM\226.\0 \277BM.g\001 \377Aloe.lzs\273
+>8 string x EDI LZSSLib packed
+#!:mime application/octet-stream
+!:mime application/x-edi-pack-lzss
+# The name of a compressed file ends with LZS suffix
+!:ext lzs
+# compressed data like: bf424df6e10100f3 ff416c6f652e6c7a ff416c6f652e6c7a
+>8 ubequad x \b, data %#16.16llx...
+
# Summary: CAZIP compressed file
# From: Joerg Jenderek
# URL: http://fileformats.archiveteam.org/wiki/CAZIP
@@ -769,8 +964,6 @@
3 string OctSqu Squash archive data
# Terse
0 string \5\1\1\0 Terse archive data
-# PUCrunch
-0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
# UHarc
0 string UHA UHarc archive data
# ABComp
@@ -799,8 +992,10 @@
# QFC
0 string \x1aFC\x1a QFC archive data
0 string \x1aQF\x1a QFC archive data
-# PRO-PACK
-0 string RNC PRO-PACK archive data
+# PRO-PACK https://www.segaretro.org/Rob_Northen_compression
+0 string RNC
+>3 byte 1 PRO-PACK archive data (compression 1)
+>3 byte 2 PRO-PACK archive data (compression 2)
# 777
0 string 777 777 archive data
# LZS221
@@ -903,11 +1098,39 @@
# TPac
0 string \4TPAC\3 TPac archive data
# Ai
+# Update: Joerg Jenderek
+# URL: http://fileformats.archiveteam.org/wiki/Ai_Archiver
0 string Ai\1\1\0 Ai archive data
+#!:mime application/octet-stream
+!:mime application/x-compress-ai
+!:ext ai
0 string Ai\1\0\0 Ai archive data
+#!:mime application/octet-stream
+!:mime application/x-compress-ai
+!:ext ai
# Ai32
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ai.trid.xml
+# Note: called "Ai Archivator compressed archive" by TrID
0 string Ai\2\0 Ai32 archive data
+#!:mime application/octet-stream
+!:mime application/x-compress-ai
+!:ext ai
+# original file name
+>8 pstring/h x "%s"
+# according to TrID the next 3 bytes are nil
+>5 ubyte !0 \b, at 5 %#x
+>6 ubyte !0 \b, at 6 %#x
+>7 ubyte !0 \b, at 7 %#x
+# the fourth byte with value 0 is probably a flag for "non solid" mode
+#>3 ubyte =0x00 \b, unsolid mode
0 string Ai\2\1 Ai32 archive data
+#!:mime application/octet-stream
+!:mime application/x-compress-ai
+!:ext ai
+# original file name
+>8 pstring/h x "%s"
+# the fourth byte with value 0x01 is probably a flag for "solid" mode; this is not the default
+>3 ubyte =0x01 \b, solid mode
# SBC
0 string SBC SBC archive data
# Ybs
@@ -1212,7 +1435,7 @@
>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data
# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
# FOOBAR archiver use ".foo" as name extension instead usual one
-# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
+# "Florian Orjanov's and Olga Bachetska's ARchiver" not found at the moment
>>>>>>>2 string -lh1 \b
!:ext lha/lzh/ice
>>>>>>3 regex \^lh[23d] LHa 2.x? archive data
@@ -1400,6 +1623,83 @@
!:mime application/zip
!:ext zip/cbz
+# Android APK file (Zip archive)
+0 string PK\003\004
+!:strength +1
+# Starts with AndroidManifest.xml (file name length = 19)
+>26 uleshort 19
+>>30 string AndroidManifest.xml Android package (APK), with AndroidManifest.xml
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>-22 string PK\005\006
+>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+# Starts with META-INF/com/android/build/gradle/app-metadata.properties
+>26 uleshort 57
+>>30 string META-INF/com/android/build/gradle/
+>>>&0 string app-metadata.properties Android package (APK), with gradle app-metadata.properties
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>-22 string PK\005\006
+>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+# Starts with classes.dex (file name length = 11)
+>26 uleshort 11
+>>30 string classes.dex Android package (APK), with classes.dex
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>-22 string PK\005\006
+>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+# Starts with META-INF/MANIFEST.MF (file name length = 20)
+# NB: checks for resources.arsc, classes.dex, etc. as well to avoid matching JAR files
+>26 uleshort 20
+>>30 string META-INF/MANIFEST.MF
+# Contains resources.arsc (near the end, in the central directory)
+>>>-512 search resources.arsc Android package (APK), with MANIFEST.MF and resources.arsc
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>-22 string PK\005\006
+>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+>>>-512 default x
+# Contains classes.dex (near the end, in the central directory)
+>>>>-512 search classes.dex Android package (APK), with MANIFEST.MF and classes.dex
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>>-22 string PK\005\006
+>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+>>>>-512 default x
+# Contains lib/armeabi (near the end, in the central directory)
+>>>>>-512 search lib/armeabi Android package (APK), with MANIFEST.MF and armeabi lib
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>>>-22 string PK\005\006
+>>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+>>>>>-512 default x
+# Contains drawables (near the end, in the central directory)
+>>>>>>-512 search res/drawable Android package (APK), with MANIFEST.MF and drawables
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>>>>-22 string PK\005\006
+>>>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+# It may or may not be an APK file, but it's definitely a Java JAR file
+>>>>>>-512 default x Java archive data (JAR)
+!:mime application/java-archive
+!:ext jar
+# Starts with zipflinger virtual entry (28 + 104 = 132 bytes)
+# See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230
+>4 string \x00\x00\x00\x00\x00\x00
+>>&0 string \x21\x08\x21\x02
+>>>&0 string \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
+>>>>&0 string \x00\x00 Android package (APK), with zipflinger virtual entry
+!:mime application/vnd.android.package-archive
+!:ext apk
+>>>>>-22 string PK\005\006
+>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block
+# APK Signing Block
+>0 default x
+>>-22 string PK\005\006
+>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 Android package (APK), with APK Signing Block
+!:mime application/vnd.android.package-archive
+!:ext apk
+
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\005\006 Zip archive data (empty)
!:mime application/zip
@@ -1502,9 +1802,13 @@
>>>>77 string -web HTML Document Template
!:mime application/vnd.oasis.opendocument.text-web
!:ext oth
->>>>77 string -master Master Document
+>>>>77 string -master
+>>>>>84 byte !0x2d Master Document
!:mime application/vnd.oasis.opendocument.text-master
!:ext odm
+>>>>>84 string -template Master Template
+!:mime application/vnd.oasis.opendocument.text-master-template
+!:ext otm
>>>73 string graphics
>>>>81 byte !0x2d Drawing
!:mime application/vnd.oasis.opendocument.graphics
@@ -1547,8 +1851,7 @@
# Valid for LibreOffice Base 6.0.1.1 at least
>>>73 string base Database
# https://bugs.documentfoundation.org/show_bug.cgi?id=45854
-!:mime application/vnd.oasis.opendocument.database
-#!:mime application/vnd.oasis.opendocument.base
+!:mime application/vnd.oasis.opendocument.base
!:ext odb
>>>73 string image
>>>>78 byte !0x2d Image
@@ -1564,6 +1867,16 @@
>>50 string epub+zip EPUB document
!:mime application/epub+zip
+# From: Hajin Jang <jb6804@naver.com>
+# hwpx (OWPML) document format follows OCF specification.
+# Hangul Word Processor 2010+ supports HWPX format.
+# URL: https://www.hancom.com/etc/hwpDownload.do
+# https://standard.go.kr/KSCI/standardIntro/getStandardSearchView.do?menuId=503&topMenuId=502&ksNo=KSX6101
+# https://e-ks.kr/streamdocs/view/sd;streamdocsId=72059197557727331
+>>50 string hwp+zip Hancom HWP (Hangul Word Processor) file, HWPX
+!:mime application/x-hwp+zip
+!:ext hwpx
+
# From: Joerg Jenderek
# URL: http://en.wikipedia.org/wiki/CorelDRAW
# NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based
@@ -1617,9 +1930,10 @@
>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
!:mime application/zip
-# Java Jar files
+# Java Jar files (see also APK files above)
>(26.s+30) leshort 0xcafe Java archive data (JAR)
!:mime application/java-archive
+!:ext jar
# iOS App
>(26.s+30) leshort !0xcafe
@@ -1652,16 +1966,116 @@
>8 belong x \b, size %d
# Zoo archiver
-20 lelong 0xfdc4a7dc Zoo archive data
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/Zoo_(file_format)
+# http://fileformats.archiveteam.org/wiki/Zoo
+# Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-zoo-strict.trid.xml
+# http://distcache.freebsd.org/ports-distfiles/zoo-2.10pl1.tar.gz/zoo.h
+# Note: called "ZOO compressed archive (strict)" by TrID and "ZOO Compressed Archive" by DROID via PUID x-fmt/269
+# verified by command like `deark -m zoo -l -d2 WHRCGA.ZOO`
+20 lelong 0xfdc4a7dc
+# skip DROID x-fmt-269-signature-id-621.zoo by looking for valid major version to manipulate archive
+>32 byte >0 Zoo archive data
!:mime application/x-zoo
->4 byte >48 \b, v%c.
->>6 byte >47 \b%c
->>>7 byte >47 \b%c
->32 byte >0 \b, modify: v%d
->>33 byte x \b.%d+
->42 lelong 0xfdc4a7dc \b,
->>70 byte >0 extract: v%d
->>>71 byte x \b.%d+
+# bak is extension of backup-ed zoo
+!:ext zoo/bak
+# version in text form like: 1.50 2.00 2.10
+>>4 byte >48 \b, v%c.
+>>>6 byte >47 \b%c
+>>>>7 byte >47 \b%c
+# ZOO files typically start with "ZOO ?.?? Archive.", followed by the bytes 0x1a 0x0 0x0; not used by Zoo and they may be anything
+>>8 string !\040Archive.\032 \b, at 8
+>>>8 string x text "%0.10s"
+# major_ver.minor_ver; minimum version needed to manipulate archive like: 1.0 2.0
+>>32 byte >0 \b, modify: v%d
+>>>33 byte x \b.%d+
+# major_ver.minor_ver; minimum version needed to extract after modify like in old versions
+>>(24.l+28) ubyte x \b, extract: v%u
+>>(24.l+29) ubyte x \b.%u+
+# with zoo 2.00 additional fields have been added in the archive header
+>>32 byte >1
+# type; type of archive header like: 1 2
+>>>34 ubyte !1 \b, header type %u
+# acmt_pos; position of archive comment like: 6258 30599 61369 149501
+>>>35 lelong >0 \b, at %d
+# acmt_len; length of archive comment like: 258
+>>>>39 uleshort x %u bytes comment
+#>>>>(35.l) ubequad x COMMENT=%16.16llx
+# 1st character of comment maybe is CarriageReturn (0x0d)
+>>>>(35.l) ubyte <040
+# 2nd character of comment maybe is LineFeed (0x0a)
+>>>>>(35.l+1) ubyte <040
+# comment string after CRLF like "Anonymous ftp site garbo.uwasa.fi 128.214.87.1 moderated by"
+>>>>>>(35.l+2) string x %s
+# next character of remaining comment maybe is CarriageReturn (0x0d)
+>>>>>>>&0 ubyte <040
+>>>>>>>>&0 ubyte <040
+# 2nd comment part like: Timo Salmi ts@chyde.uwasa.fi PC directories and uploads\015\012Harri Valkama hv@chyde.uwasa.fi PC, Mac, Unix files, and upload
+>>>>>>>>>&0 string >037 %s
+# vdata; archive-level versioning byte like: 1 3
+>>>41 ubyte !1 \b, vdata %#x
+# zoo_start; pointer to 1st entry header
+>>24 lelong x \b; at %u
+# zoo_minus; zoo_start -1 for consistency checking
+#>>28 lelong x \b, zoo_minus %#x
+# zoo_tag; tag for check
+#>>(24.l+0) ulelong !0xfdc4a7dc \b, zoo_tag=%8.8x
+# type; type of directory entry like: 1 2
+>>(24.l+4) ubyte !2 type=%u
+# packing_method; 0~no packing 1~normal LZW 2~lzh
+>>(24.l+5) ubyte x method=
+>>>(24.l+5) ubyte 0 \bnot-compressed
+>>>(24.l+5) ubyte 1 \blzd
+>>>(24.l+5) ubyte 2 \blzh
+# next; position of next directory entry
+>>(24.l+6) ulelong x \b, next entry at %u
+# offset; position of file data for this entry
+#>>(24.l+10) ulelong x \b, data at %u
+# file_crc; CRC-16 of file data
+>>(24.l+18) uleshort x \b, CRC %#4.4x
+# comment; zero if none or points to entry comment like ADD9h (WHRCGA.ZOO)
+>>(24.l+32) lelong >0 \b, at %#x
+# cmt_size; if not 0 for none then length of entry comment like: 46
+>>>(24.l+36) uleshort >0 %u bytes comment
+# entry comment itself like: "CGA .GL file showing menu input from keyboard"
+>>>>(&-6.l) string x "%s"
+# org_size; original size of file
+>>(24.l+20) ulelong x \b, size %u
+# size_now; compressed size of file
+>>(24.l+24) ulelong x (%u compressed)
+# major_ver.minor_ver; minimum version needed to extract already done
+# deleted; will be 1 if deleted, 0 if not
+>>(24.l+30) ubyte =1 \b, deleted
+# struc; file structure if any; WHAT IS THAT?
+>>(24.l+31) ubyte !0 \b, structured
+# fname[13]; short/DOS file name like 12345678.012
+>>(24.l+38) string x \b, %0.13s
+# for directory entry type 2 with variable part
+>>(24.l+4) ubyte =2
+# var_dir_len; length of variable part of dir entry
+>>>(24.l+51) uleshort >0
+#>>>(24.l+51) uleshort >0 \b, variable part length %u
+# namlen; length of long filename
+#>>>>(24.l+56) ubyte x \b, namlen %u
+# dirlen; length of directory name
+#>>>>(24.l+57) ubyte x \b, dirlen %u
+# if file length positive then show long file name
+>>>>(24.l+56) ubyte >0
+# lfname[256]; long file name \0-terminated
+>>>>>(24.l+58) string x "%s"
+# if directory length positive then jump before file name field and then jump this addtional length plus 2 (\0-terminator + dirlen field) to following directory name
+>>>>(24.l+57) ubyte >0
+>>>>>(24.l+55) ubyte x
+# dirname[256]; directory name \0-terminated
+>>>>>>&(&0.b+2) string x in "%s"
+# dir_crc; CRC of directory entry
+#>>>(24.l+54) uleshort x \b, entry CRC %#4.4x
+# tz; timezone where file was archived; 7Fh~unknown 4~1.00hoursWestOfUTC 12 16 20~5.00hoursWestOfUTC -107~26.75hoursEastOfUTC -4~1.00hoursEastOfUTC
+>>>(24.l+53) byte !0x7f \b, time zone %d/4
+# date; last mod file date in DOS format
+>>>(24.l+14) lemsdosdate x \b, modified %s
+# time; last mod file time in DOS format
+>>>(24.l+16) lemsdostime x %s
# Shell archives
10 string #\ This\ is\ a\ shell\ archive shell archive text
@@ -1724,13 +2138,17 @@
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
+!:ext torrent
# Durval Menezes, <jmgthbfile at durval dot com>
0 string d13:announce-list BitTorrent file
!:mime application/x-bittorrent
+!:ext torrent
0 string d7:comment BitTorrent file
!:mime application/x-bittorrent
+!:ext torrent
0 string d4:info BitTorrent file
!:mime application/x-bittorrent
+!:ext torrent
# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
# URL: http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver)
@@ -1763,6 +2181,19 @@
!:mime application/zip
!:ext zip/cbz
+# Recognize ZIP archives with prepended data by end-of-central-directory record
+# https://en.wikipedia.org/wiki/ZIP_(file_format)#End_of_central_directory_record_(EOCD)
+# by Michal Gorny <mgorny@gentoo.org>
+-2 uleshort 0
+>&-22 string PK\005\006
+# without #!
+>>0 string !#! Zip archive, with extra data prepended
+!:mime application/zip
+!:ext zip/cbz
+# with #!
+>>0 string/w #!\ a
+>>>&-1 string/T x %s script executable (Zip archive)
+
# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
# by Stefan `Sec` Zehl <sec@42.org>
7 string **ACE** ACE archive data
@@ -2007,7 +2438,28 @@
>3 byte x version %d
# LyNX archive
+# Update: Joerg Jenderek
+# URL: http://fileformats.archiveteam.org/wiki/Lynx_archive
+# Reference: http://ist.uwaterloo.ca/~schepers/formats/LNX.TXT
+# http://mark0.net/download/triddefs_xml.7z/defs/a/ark-lnx.trid.xml
+# Note: called "Lynx archive" by TrID and "Commodore C64 BASIC program" with "POKE 53280" by ./c64
+# TODO: merge and unify with Commodore C64 BASIC program
56 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive
+# display "Lynx archive" (strength=330) before Commodore C64 BASIC program (strength=50) handled by ./c64
+#!:strength +0
+#!:mime application/octet-stream
+!:mime application/x-commodore-lnx
+!:ext lnx
+# afterwards look for BASIC tokenized GOTO (89h) 10, line terminator \0, end of programm tag \0\0 and CarriageReturn
+>86 search/10 \x8910\0\0\0\r \b,
+# for DEBUGGING
+#>>&0 string x STRING="%s"
+# number in ASCII of directory blocks with spaces on both sides like: 1 2 3 5
+>>&0 regex [0-9]{1,5} %s directory blocks
+# signature like: "*LYNX XII BY WILL CORLEY" " LYNX IX BY WILL CORLEY" "*LYNX BY CBMCONVERT 2.0*"
+>>>&2 regex [^\r]{1,24} \b, signature "%s"
+# number of files in ASCII surrounded by spaces and delimited by CR like: 2 3 6 13 69 144 (maximum?)
+>>>>&1 regex [0-9]{1,3} \b, %s files
# From: Joerg Jenderek
# URL: https://www.acronis.com/
@@ -2040,6 +2492,7 @@
# https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5
-4 string STOP
>-16 string XPAKSTOP Gentoo binary package (XPAK)
+!:mime application/vnd.gentoo.xpak
# From: Joerg Jenderek
# URL: https://kodi.wiki/view/TexturePacker
@@ -2081,3 +2534,74 @@
# URL: http://mattmahoney.net/dc/#paq9a
# Note: Line 1186 of paq9a.cpp gives the magic bytes
0 string pQ9\001 PAQ9A archive
+
+# From wof (wof@stachelkaktus.net)
+0 string Unison\ archive\ format Unison archive format
+
+# https://ankiweb.net
+30 string collection.anki2 Anki APKG file
+#!:ext .apkg
+
+# Synology archive (DiskStation Manager 7.0+)
+# From: Alexandre Iooss <erdnaxe@crans.org>
+# Note: These archives are signed and encrypted.
+0 ulelong&0xFFFFFF00 0xEFBEAD00
+# MessagePack header (fixarray of 5 elements starting with a bin of 32 bytes)
+>8 ulelong&0x00FFFFFF 0x20C495 Synology archive
+!:ext spk
+# Extract some properties from MessagePack third item
+>>43 search/0x10000 package=
+>>>&0 string x \b, package %s
+>>43 search/0x10000 arch=
+>>>&0 string x %s
+>>43 search/0x10000 version=
+>>>&0 string x %s
+>>43 search/0x10000 create_time=
+>>>&0 string x \b, created on %s
+
+# MonoGame/XNA processed assets archive
+# From: Alexandre Iooss <erdnaxe@crans.org>
+# URL: https://github.com/MonoGame/MonoGame/blob/v3.8.1/MonoGame.Framework/Content/ContentManager.cs
+0 string XNB
+# XNB must be version 4 or 5
+>4 byte <6
+>>4 byte >3
+# Size must be positive
+>>>6 lelong >0 MonoGame/XNA processed assets
+!:ext xnb
+>>>>3 string =w \b, for Windows
+>>>>3 string =x \b, for Xbox360
+>>>>3 string =i \b, for iOS
+>>>>3 string =a \b, for Android
+>>>>3 string =d \b, for DesktopGL
+>>>>3 string =X \b, for MacOSX
+>>>>3 string =W \b, for WindowsStoreApp
+>>>>3 string =n \b, for NativeClient
+>>>>3 string =M \b, for WindowsPhone8
+>>>>3 string =r \b, for RaspberryPi
+>>>>3 string =P \b, for PlayStation4
+>>>>3 string =5 \b, for PlayStation5
+>>>>3 string =O \b, for XboxOne
+>>>>3 string =S \b, for Nintendo Switch
+>>>>3 string =G \b, for Google Stadia
+>>>>3 string =b \b, for WebAssembly and Bridge.NET
+>>>>3 string =m \b, for WindowsPhone7.0 (XNA)
+>>>>3 string =p \b, for PlayStationMobile
+>>>>3 string =v \b, for PSVita
+>>>>3 string =g \b, for Windows (OpenGL)
+>>>>3 string =l \b, for Linux
+>>>>4 byte x \b, version %d
+>>>>5 byte &0x80 \b, LZX compressed
+>>>>>10 lelong x \b, decompressed size: %d bytes
+>>>>5 byte &0x40 \b, LZ4 compressed
+>>>>>10 lelong x \b, decompressed size: %d bytes
+
+# Electron ASAR archive
+# From: Alexandre Iooss <erdnaxe@crans.org>
+# URL: https://github.com/electron/asar
+0 ulelong 4
+# Match JSON header start and end
+>16 string {"files":{"
+>>(12.l+12) string }}}} Electron ASAR archive
+!:ext asar
+>>>12 ulelong x \b, header length: %d bytes