Parsers available to Apache Tika

CompositeParser

Class: org.apache.tika.parser.CompositeParser

Composite Parser

DefaultParser

Class: org.apache.tika.parser.DefaultParser

Composite Parser

AppleSingleFileParser

Class: org.apache.tika.parser.apple.AppleSingleFileParser

Mime Types:

  • application/applefile

PListParser

Class: org.apache.tika.parser.apple.PListParser

Mime Types:

  • application/x-plist
  • application/x-bplist-itunes
  • application/x-bplist
  • application/x-bplist-memgraph
  • application/x-bplist-webarchive

ClassParser

Class: org.apache.tika.parser.asm.ClassParser

Mime Types:

  • application/java-vm

AudioParser

Class: org.apache.tika.parser.audio.AudioParser

Mime Types:

  • audio/vnd.wave
  • audio/x-wav
  • audio/basic
  • audio/x-aiff

MidiParser

Class: org.apache.tika.parser.audio.MidiParser

Mime Types:

  • application/x-midi
  • audio/midi

SourceCodeParser

Class: org.apache.tika.parser.code.SourceCodeParser

Mime Types:

  • text/x-c++src
  • text/x-groovy
  • text/x-java-source

Pkcs7Parser

Class: org.apache.tika.parser.crypto.Pkcs7Parser

Mime Types:

  • application/pkcs7-signature
  • application/pkcs7-mime

TSDParser

Class: org.apache.tika.parser.crypto.TSDParser

Mime Types:

  • application/timestamped-data

TextAndCSVParser

Class: org.apache.tika.parser.csv.TextAndCSVParser

Mime Types:

  • text/csv
  • text/tsv
  • text/plain

DBFParser

Class: org.apache.tika.parser.dbf.DBFParser

Mime Types:

  • application/x-dbf

DGN8Parser

Class: org.apache.tika.parser.dgn.DGN8Parser

Mime Types:

  • image/vnd.dgn; version=8

DIFParser

Class: org.apache.tika.parser.dif.DIFParser

Mime Types:

  • application/dif+xml

DWGParser

Class: org.apache.tika.parser.dwg.DWGParser

Mime Types:

  • image/vnd.dwg

EpubParser

Class: org.apache.tika.parser.epub.EpubParser

Mime Types:

  • application/x-ibooks+zip
  • application/epub+zip

ExecutableParser

Class: org.apache.tika.parser.executable.ExecutableParser

Mime Types:

  • application/x-msdownload
  • application/x-sharedlib
  • application/x-elf
  • application/x-object
  • application/x-executable
  • application/x-coredump

FeedParser

Class: org.apache.tika.parser.feed.FeedParser

Mime Types:

  • application/atom+xml
  • application/rss+xml

AdobeFontMetricParser

Class: org.apache.tika.parser.font.AdobeFontMetricParser

Mime Types:

  • application/x-font-adobe-metric

TrueTypeParser

Class: org.apache.tika.parser.font.TrueTypeParser

Mime Types:

  • application/x-font-ttf

HtmlParser

Class: org.apache.tika.parser.html.HtmlParser

Mime Types:

  • text/html
  • application/vnd.wap.xhtml+xml
  • application/x-asp
  • application/xhtml+xml

HttpParser

Class: org.apache.tika.parser.http.HttpParser

Mime Types:

  • application/x-httpresponse

HwpV5Parser

Class: org.apache.tika.parser.hwp.HwpV5Parser

Mime Types:

  • application/x-hwp-v5

BPGParser

Class: org.apache.tika.parser.image.BPGParser

Mime Types:

  • image/bpg
  • image/x-bpg

HeifParser

Class: org.apache.tika.parser.image.HeifParser

Mime Types:

  • image/heic-sequence
  • image/heif
  • image/heic
  • image/heif-sequence

ICNSParser

Class: org.apache.tika.parser.image.ICNSParser

Mime Types:

  • image/icns

ImageParser

Class: org.apache.tika.parser.image.ImageParser

Mime Types:

  • image/png
  • image/vnd.wap.wbmp
  • image/x-jbig2
  • image/bmp
  • image/x-xcf
  • image/gif
  • image/x-icon
  • image/x-ms-bmp

JXLParser

Class: org.apache.tika.parser.image.JXLParser

Mime Types:

  • image/jxl

JpegParser

Class: org.apache.tika.parser.image.JpegParser

Mime Types:

  • image/jpeg

PSDParser

Class: org.apache.tika.parser.image.PSDParser

Mime Types:

  • image/vnd.adobe.photoshop

TiffParser

Class: org.apache.tika.parser.image.TiffParser

Mime Types:

  • image/tiff

WebPParser

Class: org.apache.tika.parser.image.WebPParser

Mime Types:

  • image/webp

IDMLParser

Class: org.apache.tika.parser.indesign.IDMLParser

Mime Types:

  • application/vnd.adobe.indesign-idml-package

IptcAnpaParser

Class: org.apache.tika.parser.iptc.IptcAnpaParser

Mime Types:

  • text/vnd.iptc.anpa

IWorkPackageParser

Class: org.apache.tika.parser.iwork.IWorkPackageParser

Mime Types:

  • application/vnd.apple.keynote
  • application/vnd.apple.iwork
  • application/vnd.apple.numbers
  • application/vnd.apple.pages

IWork13PackageParser

Class: org.apache.tika.parser.iwork.iwana.IWork13PackageParser

Mime Types:

  • application/vnd.apple.numbers.13
  • application/vnd.apple.unknown.13
  • application/vnd.apple.pages.13
  • application/vnd.apple.keynote.13

IWork18PackageParser

Class: org.apache.tika.parser.iwork.iwana.IWork18PackageParser

Mime Types:

  • application/vnd.apple.pages.18
  • application/vnd.apple.keynote.18
  • application/vnd.apple.numbers.18

RFC822Parser

Class: org.apache.tika.parser.mail.RFC822Parser

Mime Types:

  • message/rfc822

MatParser

Class: org.apache.tika.parser.mat.MatParser

Mime Types:

  • application/x-matlab-data

MboxParser

Class: org.apache.tika.parser.mbox.MboxParser

Mime Types:

  • application/mbox

EMFParser

Class: org.apache.tika.parser.microsoft.EMFParser

Mime Types:

  • image/emf

JackcessParser

Class: org.apache.tika.parser.microsoft.JackcessParser

Mime Types:

  • application/x-msaccess

MSOwnerFileParser

Class: org.apache.tika.parser.microsoft.MSOwnerFileParser

Mime Types:

  • application/x-ms-owner

OfficeParser

Class: org.apache.tika.parser.microsoft.OfficeParser

Mime Types:

  • application/x-tika-msoffice-embedded; format=ole10_native
  • application/msword
  • application/vnd.visio
  • application/x-tika-ole-drm-encrypted
  • application/vnd.ms-project
  • application/x-tika-msworks-spreadsheet
  • application/x-mspublisher
  • application/vnd.ms-powerpoint
  • application/x-tika-msoffice
  • application/sldworks
  • application/x-tika-ooxml-protected
  • application/vnd.ms-excel
  • application/vnd.ms-outlook

OldExcelParser

Class: org.apache.tika.parser.microsoft.OldExcelParser

Mime Types:

  • application/vnd.ms-excel.workspace.3
  • application/vnd.ms-excel.workspace.4
  • application/vnd.ms-excel.sheet.2
  • application/vnd.ms-excel.sheet.3
  • application/vnd.ms-excel.sheet.4

TNEFParser

Class: org.apache.tika.parser.microsoft.TNEFParser

Mime Types:

  • application/vnd.ms-tnef
  • application/x-tnef
  • application/ms-tnef

WMFParser

Class: org.apache.tika.parser.microsoft.WMFParser

Mime Types:

  • image/wmf

ActiveMimeParser

Class: org.apache.tika.parser.microsoft.activemime.ActiveMimeParser

Mime Types:

  • application/x-activemime

ChmParser

Class: org.apache.tika.parser.microsoft.chm.ChmParser

Mime Types:

  • application/vnd.ms-htmlhelp
  • application/x-chm
  • application/chm

OneNoteParser

Class: org.apache.tika.parser.microsoft.onenote.OneNoteParser

Mime Types:

  • application/onenote; format=one

OOXMLParser

Class: org.apache.tika.parser.microsoft.ooxml.OOXMLParser

Mime Types:

  • application/vnd.ms-powerpoint.template.macroenabled.12
  • application/vnd.ms-excel.addin.macroenabled.12
  • application/vnd.openxmlformats-officedocument.wordprocessingml.template
  • application/vnd.ms-excel.sheet.binary.macroenabled.12
  • application/vnd.openxmlformats-officedocument.wordprocessingml.document
  • application/vnd.ms-powerpoint.slide.macroenabled.12
  • application/vnd.ms-visio.drawing
  • application/vnd.ms-powerpoint.slideshow.macroenabled.12
  • application/vnd.ms-powerpoint.presentation.macroenabled.12
  • application/vnd.openxmlformats-officedocument.presentationml.slide
  • application/vnd.ms-excel.sheet.macroenabled.12
  • application/vnd.ms-word.template.macroenabled.12
  • application/vnd.ms-word.document.macroenabled.12
  • application/vnd.ms-powerpoint.addin.macroenabled.12
  • application/vnd.openxmlformats-officedocument.spreadsheetml.template
  • application/vnd.ms-xpsdocument
  • application/vnd.ms-visio.drawing.macroenabled.12
  • application/vnd.ms-visio.template.macroenabled.12
  • model/vnd.dwfx+xps
  • application/vnd.openxmlformats-officedocument.presentationml.template
  • application/vnd.openxmlformats-officedocument.presentationml.presentation
  • application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
  • application/vnd.ms-visio.stencil
  • application/vnd.ms-visio.template
  • application/vnd.openxmlformats-officedocument.presentationml.slideshow
  • application/vnd.ms-visio.stencil.macroenabled.12
  • application/vnd.ms-excel.template.macroenabled.12

Word2006MLParser

Class: org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser

Mime Types:

  • application/vnd.ms-word2006ml

OutlookPSTParser

Class: org.apache.tika.parser.microsoft.pst.OutlookPSTParser

Mime Types:

  • application/vnd.ms-outlook-pst

RTFParser

Class: org.apache.tika.parser.microsoft.rtf.RTFParser

Mime Types:

  • application/rtf

SpreadsheetMLParser

Class: org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser

Mime Types:

  • application/vnd.ms-spreadsheetml

WordMLParser

Class: org.apache.tika.parser.microsoft.xml.WordMLParser

Mime Types:

  • application/vnd.ms-wordml

MIFParser

Class: org.apache.tika.parser.mif.MIFParser

Mime Types:

  • application/x-mif
  • application/vnd.mif
  • application/x-maker

Mp3Parser

Class: org.apache.tika.parser.mp3.Mp3Parser

Mime Types:

  • audio/mpeg

MP4Parser

Class: org.apache.tika.parser.mp4.MP4Parser

Mime Types:

  • video/x-m4v
  • application/mp4
  • video/3gpp
  • video/3gpp2
  • video/quicktime
  • audio/mp4
  • video/mp4

FlatOpenDocumentParser

Class: org.apache.tika.parser.odf.FlatOpenDocumentParser

Mime Types:

  • application/vnd.oasis.opendocument.tika.flat.document
  • application/vnd.oasis.opendocument.flat.presentation
  • application/vnd.oasis.opendocument.flat.spreadsheet
  • application/vnd.oasis.opendocument.flat.text

OpenDocumentParser

Class: org.apache.tika.parser.odf.OpenDocumentParser

Mime Types:

  • application/x-vnd.oasis.opendocument.presentation
  • application/vnd.oasis.opendocument.chart
  • application/x-vnd.oasis.opendocument.text-web
  • application/x-vnd.oasis.opendocument.image
  • application/vnd.oasis.opendocument.graphics-template
  • application/vnd.oasis.opendocument.text-web
  • application/x-vnd.oasis.opendocument.spreadsheet-template
  • application/vnd.oasis.opendocument.spreadsheet-template
  • application/vnd.sun.xml.writer
  • application/x-vnd.oasis.opendocument.graphics-template
  • application/vnd.oasis.opendocument.graphics
  • application/vnd.oasis.opendocument.spreadsheet
  • application/x-vnd.oasis.opendocument.chart
  • application/x-vnd.oasis.opendocument.spreadsheet
  • application/vnd.oasis.opendocument.image
  • application/x-vnd.oasis.opendocument.text
  • application/x-vnd.oasis.opendocument.text-template
  • application/vnd.oasis.opendocument.formula-template
  • application/x-vnd.oasis.opendocument.formula
  • application/vnd.oasis.opendocument.image-template
  • application/x-vnd.oasis.opendocument.image-template
  • application/x-vnd.oasis.opendocument.presentation-template
  • application/vnd.oasis.opendocument.presentation-template
  • application/vnd.oasis.opendocument.text
  • application/vnd.oasis.opendocument.text-template
  • application/vnd.oasis.opendocument.chart-template
  • application/x-vnd.oasis.opendocument.chart-template
  • application/x-vnd.oasis.opendocument.formula-template
  • application/x-vnd.oasis.opendocument.text-master
  • application/vnd.oasis.opendocument.presentation
  • application/x-vnd.oasis.opendocument.graphics
  • application/vnd.oasis.opendocument.formula
  • application/vnd.oasis.opendocument.text-master

PDFParser

Class: org.apache.tika.parser.pdf.PDFParser

Mime Types:

  • application/pdf

CompressorParser

Class: org.apache.tika.parser.pkg.CompressorParser

Mime Types:

  • application/zlib
  • application/x-gzip
  • application/x-bzip2
  • application/x-compress
  • application/x-java-pack200
  • application/x-lzma
  • application/deflate64
  • application/x-lz4
  • application/x-snappy
  • application/x-brotli
  • application/gzip
  • application/x-bzip
  • application/x-xz

PackageParser

Class: org.apache.tika.parser.pkg.PackageParser

Mime Types:

  • application/x-tar
  • application/java-archive
  • application/x-arj
  • application/x-archive
  • application/zip
  • application/x-cpio
  • application/x-tika-unix-dump
  • application/x-7z-compressed

RarParser

Class: org.apache.tika.parser.pkg.RarParser

Mime Types:

  • application/x-rar-compressed

PRTParser

Class: org.apache.tika.parser.prt.PRTParser

Mime Types:

  • application/x-prt

SAS7BDATParser

Class: org.apache.tika.parser.sas.SAS7BDATParser

Mime Types:

  • application/x-sas-data

TMXParser

Class: org.apache.tika.parser.tmx.TMXParser

Mime Types:

  • application/x-tmx

FLVParser

Class: org.apache.tika.parser.video.FLVParser

Mime Types:

  • video/x-flv

WACZParser

Class: org.apache.tika.parser.wacz.WACZParser

Mime Types:

  • application/x-wacz

WARCParser

Class: org.apache.tika.parser.warc.WARCParser

Mime Types:

  • application/warc
  • application/warc+gz

QuattroProParser

Class: org.apache.tika.parser.wordperfect.QuattroProParser

Mime Types:

  • application/x-quattro-pro; version=9

WordPerfectParser

Class: org.apache.tika.parser.wordperfect.WordPerfectParser

Mime Types:

  • application/vnd.wordperfect; version=5.1
  • application/vnd.wordperfect; version=5.0
  • application/vnd.wordperfect; version=6.x

XLIFF12Parser

Class: org.apache.tika.parser.xliff.XLIFF12Parser

Mime Types:

  • application/x-xliff+xml

XLZParser

Class: org.apache.tika.parser.xliff.XLZParser

Mime Types:

  • application/x-xliff+zip

DcXMLParser

Class: org.apache.tika.parser.xml.DcXMLParser

Mime Types:

  • application/xml
  • image/svg+xml

FictionBookParser

Class: org.apache.tika.parser.xml.FictionBookParser

Mime Types:

  • application/x-fictionbook+xml

FlacParser

Class: org.gagravarr.tika.FlacParser

Mime Types:

  • audio/x-oggflac
  • audio/x-flac

OggParser

Class: org.gagravarr.tika.OggParser

Mime Types:

  • audio/ogg
  • application/kate
  • application/ogg
  • video/daala
  • video/x-ogguvs
  • video/x-ogm
  • audio/x-oggpcm
  • video/ogg
  • video/x-dirac
  • video/x-oggrgb
  • video/x-oggyuv

OpusParser

Class: org.gagravarr.tika.OpusParser

Mime Types:

  • audio/opus
  • audio/ogg; codecs=opus

SpeexParser

Class: org.gagravarr.tika.SpeexParser

Mime Types:

  • audio/ogg; codecs=speex
  • audio/speex

TheoraParser

Class: org.gagravarr.tika.TheoraParser

Mime Types:

  • video/theora

VorbisParser

Class: org.gagravarr.tika.VorbisParser

Mime Types:

  • audio/vorbis

OOXMLParser

Class: org.apache.tika.parser.microsoft.ooxml.OOXMLParser

Mime Types:

PDFParser

Class: org.apache.tika.parser.pdf.PDFParser

Mime Types: