//======================================================================== // // pdfdetach.cc // // Copyright 2010 Glyph & Cog, LLC // //======================================================================== //======================================================================== // // Modified under the Poppler project - http://poppler.freedesktop.org // // All changes made under the Poppler project to this file are licensed // under GPL version 2 or later // // Copyright (C) 2011 Carlos Garcia Campos // Copyright (C) 2013 Yury G. Kudryashov // Copyright (C) 2014, 2017 Adrian Johnson // Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid // Copyright (C) 2018 Adam Reichold // Copyright (C) 2019, 2021, 2024 Oliver Sander // Copyright (C) 2020 // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git // //======================================================================== #include "config.h" #include #include #include "goo/gmem.h" #include "parseargs.h" #include "Annot.h" #include "GlobalParams.h" #include "Page.h" #include "PDFDoc.h" #include "PDFDocFactory.h" #include "FileSpec.h" #include "CharTypes.h" #include "Catalog.h" #include "UnicodeMap.h" #include "PDFDocEncoding.h" #include "Error.h" #include "UTF.h" #include "Win32Console.h" #include static bool doList = false; static int saveNum = 0; static char saveFile[128] = ""; static bool saveAll = false; static char savePath[1024] = ""; static char textEncName[128] = ""; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; static bool printVersion = false; static bool printHelp = false; static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" }, { "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" }, { "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" }, { "-saveall", argFlag, &saveAll, 0, "save all embedded files" }, { "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" }, { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" }, { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" }, { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" }, { "-v", argFlag, &printVersion, 0, "print copyright and version info" }, { "-h", argFlag, &printHelp, 0, "print usage information" }, { "-help", argFlag, &printHelp, 0, "print usage information" }, { "--help", argFlag, &printHelp, 0, "print usage information" }, { "-?", argFlag, &printHelp, 0, "print usage information" }, {} }; int main(int argc, char *argv[]) { std::unique_ptr doc; GooString *fileName; const UnicodeMap *uMap; std::optional ownerPW, userPW; char uBuf[8]; bool ok; bool hasSaveFile; std::vector> embeddedFiles; int nFiles, nPages, n, i, j; Page *page; Annots *annots; const GooString *s1; Unicode u; bool isUnicode; Win32Console win32Console(&argc, &argv); // parse args ok = parseArgs(argDesc, &argc, argv); hasSaveFile = strlen(saveFile) > 0; if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) { ok = false; } if (!ok || argc != 2 || printVersion || printHelp) { fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION); fprintf(stderr, "%s\n", popplerCopyright); fprintf(stderr, "%s\n", xpdfCopyright); if (!printVersion) { printUsage("pdfdetach", "", argDesc); } return 99; } fileName = new GooString(argv[1]); // read config file globalParams = std::make_unique(); if (textEncName[0]) { globalParams->setTextEncoding(textEncName); } // get mapping to output encoding if (!(uMap = globalParams->getTextEncoding())) { error(errConfig, -1, "Couldn't get text encoding"); delete fileName; return 99; } // open PDF file if (ownerPassword[0] != '\001') { ownerPW = GooString(ownerPassword); } if (userPassword[0] != '\001') { userPW = GooString(userPassword); } doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); if (!doc->isOk()) { return 1; } for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) { embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i)); } nPages = doc->getCatalog()->getNumPages(); for (i = 0; i < nPages; ++i) { page = doc->getCatalog()->getPage(i + 1); if (!page) { continue; } annots = page->getAnnots(); if (!annots) { break; } for (Annot *annot : annots->getAnnots()) { if (annot->getType() != Annot::typeFileAttachment) { continue; } embeddedFiles.push_back(std::make_unique(static_cast(annot)->getFile())); } } nFiles = embeddedFiles.size(); // list embedded files if (doList) { printf("%d embedded files\n", nFiles); for (i = 0; i < nFiles; ++i) { const std::unique_ptr &fileSpec = embeddedFiles[i]; printf("%d: ", i + 1); s1 = fileSpec->getFileName(); if (!s1) { return 3; } if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { isUnicode = false; j = 0; } while (j < s1->getLength()) { if (isUnicode) { u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); j += 2; } else { u = pdfDocEncoding[s1->getChar(j) & 0xff]; ++j; } n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); fwrite(uBuf, 1, n, stdout); } fputc('\n', stdout); } // save all embedded files } else if (saveAll) { std::filesystem::path basePath = savePath; if (basePath.empty()) { basePath = std::filesystem::current_path(); } basePath = basePath.lexically_normal(); for (i = 0; i < nFiles; ++i) { const std::unique_ptr &fileSpec = embeddedFiles[i]; std::string filename; s1 = fileSpec->getFileName(); if (!s1) { return 3; } if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { isUnicode = false; j = 0; } while (j < s1->getLength()) { if (isUnicode) { u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); j += 2; } else { u = pdfDocEncoding[s1->getChar(j) & 0xff]; ++j; } n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); filename.append(uBuf, n); } if (filename.empty()) { return 3; } std::filesystem::path filePath = basePath; filePath = filePath.append(filename).lexically_normal(); if (!filePath.generic_string().starts_with(basePath.generic_string())) { error(errIO, -1, "Preventing directory traversal"); return 3; } auto *embFile = fileSpec->getEmbeddedFile(); if (!embFile || !embFile->isOk()) { return 3; } if (!embFile->save(filePath.generic_string())) { error(errIO, -1, "Error saving embedded file as '{0:s}'", filePath.c_str()); return 2; } } // save an embedded file } else { if (hasSaveFile) { for (i = 0; i < nFiles; ++i) { const std::unique_ptr &fileSpec = embeddedFiles[i]; s1 = fileSpec->getFileName(); if (strcmp(s1->c_str(), saveFile) == 0) { saveNum = i + 1; break; } } } if (saveNum < 1 || saveNum > nFiles) { error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number"); return 99; } const std::unique_ptr &fileSpec = embeddedFiles[saveNum - 1]; std::string targetPath = savePath; if (targetPath.empty()) { // The user hasn't given a path to save, just use the filename specified in the pdf as name s1 = fileSpec->getFileName(); if (!s1) { return 3; } if (hasUnicodeByteOrderMark(s1->toStr())) { isUnicode = true; j = 2; } else { isUnicode = false; j = 0; } while (j < s1->getLength()) { if (isUnicode) { u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); j += 2; } else { u = pdfDocEncoding[s1->getChar(j) & 0xff]; ++j; } n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); targetPath.append(uBuf, n); } const std::filesystem::path basePath = std::filesystem::current_path().lexically_normal(); std::filesystem::path filePath = basePath; filePath = filePath.append(targetPath).lexically_normal(); if (!filePath.generic_string().starts_with(basePath.generic_string())) { error(errIO, -1, "Preventing directory traversal"); return 3; } targetPath = filePath.generic_string(); } auto *embFile = fileSpec->getEmbeddedFile(); if (!embFile || !embFile->isOk()) { return 3; } if (!embFile->save(targetPath)) { error(errIO, -1, "Error saving embedded file as '{0:s}'", targetPath.c_str()); return 2; } } return 0; }