Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create preview content for tar files #759

Merged
merged 11 commits into from
Oct 17, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.sql.SQLException;
import java.util.ArrayList;
Expand All @@ -33,8 +34,8 @@
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.rest.Parameter;
Expand Down Expand Up @@ -82,7 +83,10 @@
@Component(MetadataBitstreamWrapperRest.CATEGORY + "." + MetadataBitstreamWrapperRest.NAME)
public class MetadataBitstreamRestRepository extends DSpaceRestRepository<MetadataBitstreamWrapperRest, Integer> {
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(MetadataBitstreamRestRepository.class);

private final String FILE_EXTENSION_ZIP = ".zip";
private final String FILE_EXTENSION_TAR = ".tar";
private final String ARCHIVE_TYPE_ZIP = "zip";
private final String ARCHIVE_TYPE_TAR = "tar";
// This constant is used to limit the length of the preview content stored in the database to prevent
// the database from being overloaded with large amounts of data.
private static final int MAX_PREVIEW_COUNT_LENGTH = 2000;
Expand Down Expand Up @@ -329,17 +333,19 @@ private List<FileInfo> processInputStreamToFilePreview(Context context, Bitstrea
} else {
String data = "";
if (bitstream.getFormat(context).getMIMEType().equals("application/zip")) {
data = extractFile(inputStream, "zip");
data = extractFile(inputStream, ARCHIVE_TYPE_ZIP);
try {
fileInfos = FileTreeViewGenerator.parse(data);
} catch (Exception e) {
log.error("Cannot extract file content because: {}", e.getMessage());
}
} else if (bitstream.getFormat(context).getMIMEType().equals("application/x-tar")) {
ArchiveInputStream is = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.TAR,
inputStream);
data = extractFile(is, "tar");
fileInfos = FileTreeViewGenerator.parse(data);
data = extractFile(inputStream, ARCHIVE_TYPE_TAR);
try {
fileInfos = FileTreeViewGenerator.parse(data);
} catch (Exception e) {
log.error("Cannot extract file content because: {}", e.getMessage());
}
}
}
return fileInfos;
Expand Down Expand Up @@ -382,76 +388,107 @@ private String composePreviewURL(Context context, Item item, Bitstream bitstream
return url;
}


/**
* Convert ZIP file into structured String.
* @param inputStream Input stream with ZIP content
* @param fileType ZIP/TAR
* @return structured String
* Creates a temporary file with the appropriate extension based on the specified file type.
* @param fileType the type of file for which to create a temporary file
* @return a Path object representing the temporary file
* @throws IOException if an I/O error occurs while creating the file
*/
public String extractFile(InputStream inputStream, String fileType) {
List<String> filePaths = new ArrayList<>();
Path tempFile = null;
FileSystem zipFileSystem = null;
private Path createTempFile(String fileType) throws IOException {
String extension = FILE_EXTENSION_TAR.equals(fileType) ? FILE_EXTENSION_TAR : FILE_EXTENSION_ZIP;
return Files.createTempFile("temp", extension);
}

try {
switch (fileType) {
case "tar":
tempFile = Files.createTempFile("temp", ".tar");
break;
default:
tempFile = Files.createTempFile("temp", ".zip");
/**
* Adds a file path and its size to the list of file paths.
* If the path represents a directory, appends a "/" to the path.
* @param filePaths the list of file paths to add to
* @param path the file or directory path
* @param size the size of the file or directory
*/
private void addFilePath(List<String> filePaths, String path, long size) {
String fileInfo = (Files.isDirectory(Paths.get(path))) ? path + "/|" + size : path + "|" + size;
filePaths.add(fileInfo);
}

/**
* Processes a TAR file, extracting its entries and adding their paths to the provided list.
* @param filePaths the list to populate with the extracted file paths
* @param tempFile the temporary TAR file to process
* @throws IOException if an I/O error occurs while reading the TAR file
*/
private void processTarFile(List<String> filePaths, Path tempFile) throws IOException {
try (InputStream fi = Files.newInputStream(tempFile);
TarArchiveInputStream tis = new TarArchiveInputStream(fi)) {
TarArchiveEntry entry;
while ((entry = tis.getNextTarEntry()) != null) {
addFilePath(filePaths, entry.getName(), entry.getSize());
}
}
}

Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
/**
* Processes a ZIP file, extracting its entries and adding their paths to the provided list.
* @param filePaths the list to populate with the extracted file paths
* @param zipFileSystem the FileSystem object representing the ZIP file
* @throws IOException if an I/O error occurs while reading the ZIP file
*/
private void processZipFile(List<String> filePaths, FileSystem zipFileSystem) throws IOException {
Path root = zipFileSystem.getPath("/");
Files.walk(root).forEach(path -> {
try {
long fileSize = Files.size(path);
addFilePath(filePaths, path.toString().substring(1), fileSize);
} catch (IOException e) {
e.printStackTrace();
}
});
}

zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null);
Path root = zipFileSystem.getPath("/");
Files.walk(root)
.forEach(path -> {
try {
long fileSize = Files.size(path);
if (Files.isDirectory(path)) {
filePaths.add(path.toString().substring(1) + "/|" + fileSize );
} else {
filePaths.add(path.toString().substring(1) + "|" + fileSize );
}
} catch (IOException e) {
e.printStackTrace();
}
});
} catch (IOException e) {
e.printStackTrace();
} finally {
if (zipFileSystem != null) {
try {
zipFileSystem.close();
} catch (IOException e) {
e.printStackTrace();
}
/**
* Closes the specified FileSystem resource if it is not null.
* @param zipFileSystem the FileSystem to close
*/
private void closeFileSystem(FileSystem zipFileSystem) {
if (!Objects.isNull(zipFileSystem)) {
try {
zipFileSystem.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

if (tempFile != null) {
try {
Files.delete(tempFile);
} catch (IOException e) {
e.printStackTrace();
}
/**
* Deletes the specified temporary file if it is not null.
* @param tempFile the Path object representing the temporary file to delete
*/
private void deleteTempFile(Path tempFile) {
if (!Objects.isNull(tempFile)) {
try {
Files.delete(tempFile);
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* Builds an XML response string based on the provided list of file paths.
* @param filePaths the list of file paths to include in the XML response
* @return an XML string representation of the file paths
*/
private String buildXmlResponse(List<String> filePaths) {
// Is a folder regex
String folderRegex = "/|\\d+";
Pattern pattern = Pattern.compile(folderRegex);

StringBuilder sb = new StringBuilder();
sb.append(("<root>"));
sb.append("<root>");
Iterator<String> iterator = filePaths.iterator();
int fileCounter = 0;
while ((iterator.hasNext() && fileCounter < maxPreviewCount)) {
while (iterator.hasNext() && fileCounter < maxPreviewCount) {
String filePath = iterator.next();

// Check if the file is a folder
Matcher matcher = pattern.matcher(filePath);
if (!matcher.matches()) {
Expand All @@ -464,10 +501,46 @@ public String extractFile(InputStream inputStream, String fileType) {
if (fileCounter > maxPreviewCount) {
sb.append("<element>...too many files...|0</element>");
}
sb.append(("</root>"));
sb.append("</root>");
return sb.toString();
}

/**
* Extracts files from an InputStream, processes them based on the specified file type (tar or zip),
* and returns an XML representation of the file paths.
* @param inputStream the InputStream containing the file data
* @param fileType the type of file to extract ("tar" or "zip")
* @return an XML string representing the extracted file paths
*/
public String extractFile(InputStream inputStream, String fileType) {
List<String> filePaths = new ArrayList<>();
Path tempFile = null;
FileSystem zipFileSystem = null;

try {
// Create a temporary file based on the file type
tempFile = createTempFile(fileType);

// Copy the input stream to the temporary file
Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);

// Process the file based on its type
if ("tar".equals(fileType)) {
processTarFile(filePaths, tempFile);
} else {
zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null);
processZipFile(filePaths, zipFileSystem);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
closeFileSystem(zipFileSystem);
deleteTempFile(tempFile);
}

return buildXmlResponse(filePaths);
}

/**
* Read input stream and return content as String
* @param inputStream to read
Expand Down
Loading