BaseXlsImporter.java

package org.dynamoframework.importer.impl;

/*-
 * #%L
 * Dynamo Framework
 * %%
 * Copyright (C) 2014 - 2024 Open Circle Solutions
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.github.pjfanning.xlsx.StreamingReader;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.dynamoframework.exception.OCSImportException;
import org.dynamoframework.exception.OCSRuntimeException;
import org.dynamoframework.importer.ImportField;
import org.dynamoframework.importer.dto.AbstractDTO;
import org.dynamoframework.utils.ClassUtils;
import org.dynamoframework.configuration.DynamoPropertiesHolder;
import org.springframework.beans.BeanUtils;

import java.beans.PropertyDescriptor;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;

import static java.lang.String.format;

/**
 * Base class for services that can be used to import Excel files.
 *
 * @author bas.rutten
 */
public class BaseXlsImporter extends BaseImporter<Row, Cell> {

	private static final int CACHE_SIZE = 500;

	/**
	 * Checks if any cell in a row contains a certain (String) value
	 *
	 * @param row   the row to check
	 * @param value the String value to check for
	 * @return true if this is the case, false otherwise
	 */
	protected boolean containsStringValue(Row row, String value) {
		if (row == null || !row.iterator().hasNext()) {
			return false;
		}

		boolean found = false;
		for (int i = row.getFirstCellNum(); !found && i < row.getLastCellNum(); i++) {
			if (row.getCell(i) != null) {
				try {
					found = value.equalsIgnoreCase(row.getCell(i).getStringCellValue());
				} catch (Exception ex) {
					// exception when there is not a String in the cell - nothing can be done about
					// this
				}
			}
		}
		return found;
	}

	@Override
	public int countRows(byte[] bytes, int sheetIndex) {
		int count = 0;
		try (Workbook wb = createReader(bytes, CACHE_SIZE)) {
			for (Row r : wb.getSheetAt(sheetIndex)) {
				// if a row in the middle of the sheet is empty, we assume
				// everything else is empty
				if (isRowEmpty(r)) {
					break;
				}
				count++;
			}
			return count;
		} catch (IOException ex) {
			throw new OCSRuntimeException(ex.getMessage());
		}
	}

	/**
	 * Creates a reader for processing an Excel file using streaming
	 *
	 * @param bytes     the content of the file
	 * @param cacheSize the cache size
	 * @return the reader
	 */
	public Workbook createReader(byte[] bytes, int cacheSize) {
		return StreamingReader.builder().rowCacheSize(cacheSize).open(new ByteArrayInputStream(bytes));
	}

	/**
	 * Creates a workbook from an array of bytes
	 *
	 * @param bytes the byte content of the file
	 * @return the workbook
	 */
	public Workbook createWorkbook(byte[] bytes) {
		Workbook workbook;
		try {
			// first, try to check if it's an old Excel file
			workbook = new HSSFWorkbook(new ByteArrayInputStream(bytes));
		} catch (OfficeXmlFileException ex) {
			try {
				workbook = new XSSFWorkbook(new ByteArrayInputStream(bytes));
			} catch (IOException e) {
				throw new OCSRuntimeException(e.getMessage(), e);
			}
		} catch (IOException e) {
			throw new OCSRuntimeException(e.getMessage(), e);
		}
		return workbook;
	}

	/**
	 * Extracts a Boolean value from a cell
	 *
	 * @param cell the cell to extract the value from
	 * @return the value
	 */
	protected Boolean getBooleanValue(Cell cell) {
		if (cell != null && (CellType.BOOLEAN == cell.getCellType())) {
			return cell.getBooleanCellValue();
		} else if (cell != null && CellType.STRING == cell.getCellType()) {
			return Boolean.valueOf(cell.getStringCellValue());
		} else if (cell != null && CellType.FORMULA == cell.getCellType()) {
			return cell.getBooleanCellValue();
		}
		return Boolean.FALSE;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	protected Boolean getBooleanValueWithDefault(Cell unit, ImportField field) {
		Boolean b = getBooleanValue(unit);
		if (b == null && field.defaultValue() != null) {
			return Boolean.valueOf(field.defaultValue());
		}
		return b;
	}

	/**
	 * Retrieves a date value from a cell
	 *
	 * @param cell the cell to extract the value from
	 * @return the value
	 */
	protected LocalDate getDateValue(Cell cell) {
		if (cell != null && (CellType.NUMERIC == cell.getCellType() || CellType.BLANK == cell.getCellType())) {
			try {
				return LocalDate.from(cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()));
			} catch (NullPointerException nex) {
				// horrible code throws NPE when cell is empty
				return null;
			}
		} else if (cell != null && CellType.FORMULA == cell.getCellType()) {
			try {
				return LocalDate.from(cell.getDateCellValue().toInstant().atZone(ZoneId.systemDefault()));
			} catch (NullPointerException nex) {
				// horrible code throws NPE when cell is empty
				return null;
			}
		}
		return null;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	protected LocalDate getDateValueWithDefault(Cell cell, ImportField field) {
		LocalDate value = getDateValue(cell);
		if (value == null && !StringUtils.isEmpty(field.defaultValue())) {
			value = LocalDate.parse(field.defaultValue(),
				DateTimeFormatter.ofPattern(DynamoPropertiesHolder.getDynamoProperties().getDefaults().getDateFormat()));
		}
		return value;
	}

	/**
	 * Retrieves a numeric value for a cell
	 *
	 * @param cell the cell to retrieve the value from
	 * @return the value
	 */
	protected Double getNumericValue(Cell cell) {
		if (cell != null && (CellType.NUMERIC == cell.getCellType() || CellType.BLANK == cell.getCellType())) {
			try {
				return cell.getNumericCellValue();
			} catch (NullPointerException nex) {
				// cannot return null from getNumericCellValue - so if the cell
				// is empty we
				// have to handle it in this ugly way
				return null;
			} catch (Exception ex) {
				throw new OCSImportException(format("Found an invalid numeric value: %s", cell.getStringCellValue()),
					ex);
			}
		} else if (cell != null && CellType.STRING == cell.getCellType()) {
			// in case the value is not numeric, simply output a warning. If the
			// field is required, this will trigger
			// an error at a later stage
			if (!StringUtils.isEmpty(cell.getStringCellValue().trim())) {
				throw new OCSImportException(format("Found an invalid numeric value: %s", cell.getStringCellValue()));
			}
		} else if (cell != null && CellType.FORMULA == cell.getCellType()) {
			try {
				return cell.getNumericCellValue();
			} catch (NullPointerException nex) {
				return null;
			} catch (NumberFormatException ex) {
				throw new OCSImportException(format("Found an invalid numeric value: %s", cell.getStringCellValue()));
			}
		}
		return null;
	}

	/**
	 * Retrieves the numeric value of a cell, or falls back to a suitable default
	 * value if the cell is empty and a default value has been specified
	 *
	 * @param cell  the cell to extract the value from
	 * @param field the field definition
	 * @return the value
	 */
	@Override
	public Double getNumericValueWithDefault(Cell cell, ImportField field) {
		Double value = getNumericValue(cell);
		if (value == null && !StringUtils.isEmpty(field.defaultValue())) {
			value = Double.valueOf(field.defaultValue());
		}
		return value;
	}

	/**
	 * Retrieves the value of a cell as a string. Returns <code>null</code> if the
	 * cell does not contain a string
	 *
	 * @param cell the cell to extract the value from
	 * @return the value
	 */
	protected String getStringValue(Cell cell) {
		if (cell != null && (CellType.STRING == cell.getCellType() || cell.getCellType() == CellType.BLANK)) {
			String value = cell.getStringCellValue();
			return value == null ? null : value.trim();
		} else if (cell != null && CellType.NUMERIC == cell.getCellType()) {
			// if a number is entered in a field that is supposed to contain a
			// string, Excel goes insane. We have to compensate for this
			double d = cell.getNumericCellValue();
			return Double.toString(d);
		} else if (cell != null && CellType.FORMULA == cell.getCellType()) {
			try {
				return cell.getStringCellValue();
			} catch (Exception ex) {
				throw new OCSImportException("Found an invalid string value", ex);
			}
		}
		return null;
	}

	/**
	 * Retrieves the value of a cell as a String, or falls back to a default value
	 * if the value is empty and a suitable default value is defined
	 *
	 * @param cell  the cell to extract the value from
	 * @param field the field definition
	 * @return the value
	 */
	@Override
	protected String getStringValueWithDefault(Cell cell, ImportField field) {
		String value = getStringValue(cell);
		if (StringUtils.isEmpty(value) && !StringUtils.isEmpty(field.defaultValue())) {
			value = field.defaultValue();
		}
		return value;
	}

	@Override
	protected Cell getUnit(Row row, ImportField field) {
		return row.getCell(row.getFirstCellNum() + field.index());
	}

	@Override
	public boolean isPercentageCorrectionSupported() {
		return true;
	}

	/**
	 * Check if the specified row is completely empty
	 *
	 * @param row the row to check
	 * @return true if ths is the case, false otherwise
	 */
	public boolean isRowEmpty(Row row) {
		if (row == null || row.getFirstCellNum() < 0) {
			return true;
		}

		for (Cell next : row) {
			String value = next.getStringCellValue();
			if (!StringUtils.isEmpty(value)) {
				return false;
			}
		}

		return true;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	protected boolean isWithinRange(Row row, ImportField field) {
		return row.getFirstCellNum() + field.index() < row.getLastCellNum();
	}

	/**
	 * Processes a number of consecutive rows and translates them into a DTO
	 *
	 * @param sheet         the sheet to read the values from
	 * @param firstRowIndex the index of the first row to start reading from
	 * @param colIndex      the index of the column that contains the values
	 * @param clazz         the class
	 * @return the resulting DTO
	 */
	public <T extends AbstractDTO> T processRows(Sheet sheet, int firstRowIndex, int colIndex, Class<T> clazz) {
		T dto = ClassUtils.instantiateClass(clazz);

		Object firstCellValue = null;
		try {
			firstCellValue = sheet.getRow(firstRowIndex).getCell(colIndex).getStringCellValue();
		} catch (Exception ex) {
			// do nothing - not a String value
		}

		if (firstCellValue != null && !"".equals(firstCellValue.toString())) {
			PropertyDescriptor[] descriptors = BeanUtils.getPropertyDescriptors(clazz);
			for (PropertyDescriptor d : descriptors) {
				ImportField field = ClassUtils.getAnnotation(clazz, d.getName(), ImportField.class);
				if (field != null) {
					int rowNum = firstRowIndex + field.index();
					if (rowNum <= sheet.getLastRowNum()) {
						Row row = sheet.getRow(rowNum);
						Cell unit = row.getCell(colIndex);
						Object obj = getFieldValue(d, unit, field);
						if (obj != null) {
							ClassUtils.setFieldValue(dto, d.getName(), obj);
						} else if (field.required()) {
							// a required value is missing!
							throw new OCSImportException(
								format("Required value for field '%s' is missing", d.getName()));
						}
					} else {
						throw new OCSImportException(
							format("Input doesn't have enough rows: row %d does not exist", rowNum));
					}
				}
			}
		}
		return dto;
	}
}