200万做网站,宣传推广方案模板,网站准确的定位,06627网页制作与网站建设1. 简介 我们在前面的文章中提到了calcite支持csv和json文件的数据源适配, 其实就是将文件解析成表然后以文件夹为schema, 然后将生成的schema注册到RootSehema(RootSchema是所有数据源schema的parent#xff0c;多个不同数据源schema可以挂在同一个RootSchema下)下, 最终使用…1. 简介 我们在前面的文章中提到了calcite支持csv和json文件的数据源适配, 其实就是将文件解析成表然后以文件夹为schema, 然后将生成的schema注册到RootSehema(RootSchema是所有数据源schema的parent多个不同数据源schema可以挂在同一个RootSchema下)下, 最终使用calcite的特性进行sql的解析查询返回. 但其实我们的数据文件一般使用excel进行存储,流转, 但很可惜, calcite本身没有excel的适配器, 但其实我们可以模仿calcite-file, 自己搞一个calcite-file-excel, 也可以熟悉calcite的工作原理. 2. 实现思路 因为excel有sheet的概念, 所以可以将一个excel解析成schema, 每个sheet解析成table, 实现步骤如下: 实现SchemaFactory重写create方法: schema工厂 用于创建schema 继承AbstractSchema: schema描述类 用于解析excel, 创建table(解析sheet) 继承AbstractTable, ScannableTable: table描述类 提供字段信息和数据内容等(解析sheet data) 3. Excel样例 excel有两个sheet页, 分别是user_info 和 role_info如下: ok, 万事具备. 4. Maven dependencygroupIdorg.apache.poi/groupIdartifactIdpoi-ooxml/artifactIdversion5.2.3/version
/dependencydependencygroupIdorg.apache.poi/groupIdartifactIdpoi/artifactIdversion5.2.3/version
/dependencydependencygroupIdorg.apache.calcite/groupIdartifactIdcalcite-core/artifactIdversion1.37.0/version
/dependency 5. 核心代码 5.1 SchemaFactory package com.ldx.calcite.excel;import com.google.common.collect.Lists;
import org.apache.calcite.schema.Schema;
import org.apache.calcite.schema.SchemaFactory;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;import java.io.File;
import java.util.List;
import java.util.Map;/*** schema factory*/
public class ExcelSchemaFactory implements SchemaFactory {public final static ExcelSchemaFactory INSTANCE new ExcelSchemaFactory();private ExcelSchemaFactory(){}Overridepublic Schema create(SchemaPlus parentSchema, String name, MapString, Object operand) {final Object filePath operand.get(filePath);if (ObjectUtils.isEmpty(filePath)) {throw new NullPointerException(can not find excel file);}return this.create(filePath.toString());}public Schema create(String excelFilePath) {if (StringUtils.isBlank(excelFilePath)) {throw new NullPointerException(can not find excel file);}return this.create(new File(excelFilePath));}public Schema create(File excelFile) {if (ObjectUtils.isEmpty(excelFile) || !excelFile.exists()) {throw new NullPointerException(can not find excel file);}if (!excelFile.isFile() || !isExcelFile(excelFile)) {throw new RuntimeException(can not find excel file: excelFile.getAbsolutePath());}return new ExcelSchema(excelFile);}protected ListString supportedFileSuffix() {return Lists.newArrayList(xls, xlsx);}private boolean isExcelFile(File excelFile) {if (ObjectUtils.isEmpty(excelFile)) {return false;}final String name excelFile.getName();return StringUtils.endsWithAny(name, this.supportedFileSuffix().toArray(new String[0]));}
} schema中有多个重载的create方法用于方便的创建schema, 最终将excel file 交给ExcelSchema创建一个schema对象 5.2 Schema package com.ldx.calcite.excel;import org.apache.calcite.schema.Table;
import org.apache.calcite.schema.impl.AbstractSchema;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.testng.collections.Maps;import java.io.File;
import java.util.Iterator;
import java.util.Map;/*** schema*/
public class ExcelSchema extends AbstractSchema {private final File excelFile;private MapString, Table tableMap;public ExcelSchema(File excelFile) {this.excelFile excelFile;}Overrideprotected MapString, Table getTableMap() {if (ObjectUtils.isEmpty(tableMap)) {tableMap createTableMap();}return tableMap;}private MapString, Table createTableMap() {final MapString, Table result Maps.newHashMap();try (Workbook workbook WorkbookFactory.create(excelFile)) {final IteratorSheet sheetIterator workbook.sheetIterator();while (sheetIterator.hasNext()) {final Sheet sheet sheetIterator.next();final ExcelScannableTable excelScannableTable new ExcelScannableTable(sheet, null);result.put(sheet.getSheetName(), excelScannableTable);}}catch (Exception ignored) {}return result;}
} schema类读取Excel file, 并循环读取sheet, 将每个sheet解析成ExcelScannableTable并存储 5.3 Table package com.ldx.calcite.excel;import com.google.common.collect.Lists;
import com.ldx.calcite.excel.enums.JavaFileTypeEnum;
import org.apache.calcite.DataContext;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.Linq4j;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelProtoDataType;
import org.apache.calcite.schema.ScannableTable;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Pair;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.checkerframework.checker.nullness.qual.Nullable;import java.util.List;/*** table*/
public class ExcelScannableTable extends AbstractTable implements ScannableTable {private final RelProtoDataType protoRowType;private final Sheet sheet;private RelDataType rowType;private ListJavaFileTypeEnum fieldTypes;private ListObject[] rowDataList;public ExcelScannableTable(Sheet sheet, RelProtoDataType protoRowType) {this.protoRowType protoRowType;this.sheet sheet;}Overridepublic EnumerableNullable Object[] scan(DataContext root) {JavaTypeFactory typeFactory root.getTypeFactory();final ListJavaFileTypeEnum fieldTypes this.getFieldTypes(typeFactory);if (rowDataList null) {rowDataList readExcelData(sheet, fieldTypes);}return Linq4j.asEnumerable(rowDataList);}Overridepublic RelDataType getRowType(RelDataTypeFactory typeFactory) {if (ObjectUtils.isNotEmpty(protoRowType)) {return protoRowType.apply(typeFactory);}if (ObjectUtils.isEmpty(rowType)) {rowType deduceRowType((JavaTypeFactory) typeFactory, sheet, null);}return rowType;}public ListJavaFileTypeEnum getFieldTypes(RelDataTypeFactory typeFactory) {if (fieldTypes null) {fieldTypes Lists.newArrayList();deduceRowType((JavaTypeFactory) typeFactory, sheet, fieldTypes);}return fieldTypes;}private ListObject[] readExcelData(Sheet sheet, ListJavaFileTypeEnum fieldTypes) {ListObject[] rowDataList Lists.newArrayList();for (int rowIndex 1; rowIndex sheet.getLastRowNum(); rowIndex) {Row row sheet.getRow(rowIndex);Object[] rowData new Object[fieldTypes.size()];for (int i 0; i row.getLastCellNum(); i) {final JavaFileTypeEnum javaFileTypeEnum fieldTypes.get(i);Cell cell row.getCell(i, Row.MissingCellPolicy.CREATE_NULL_AS_BLANK);final Object cellValue javaFileTypeEnum.getCellValue(cell);rowData[i] cellValue;}rowDataList.add(rowData);}return rowDataList;}public static RelDataType deduceRowType(JavaTypeFactory typeFactory, Sheet sheet, ListJavaFileTypeEnum fieldTypes) {final ListString names Lists.newArrayList();final ListRelDataType types Lists.newArrayList();if (sheet ! null) {Row headerRow sheet.getRow(0);if (headerRow ! null) {for (int i 0; i headerRow.getLastCellNum(); i) {Cell cell headerRow.getCell(i, Row.MissingCellPolicy.CREATE_NULL_AS_BLANK);String[] columnInfo cell.getStringCellValue().split(:);String columnName columnInfo[0].trim();String columnType null;if (columnInfo.length 2) {columnType columnInfo[1].trim();}final JavaFileTypeEnum javaFileType JavaFileTypeEnum.of(columnType).orElse(JavaFileTypeEnum.UNKNOWN);final RelDataType sqlType typeFactory.createSqlType(javaFileType.getSqlTypeName());names.add(columnName);types.add(sqlType);if (fieldTypes ! null) {fieldTypes.add(javaFileType);}}}}if (names.isEmpty()) {names.add(line);types.add(typeFactory.createSqlType(SqlTypeName.VARCHAR));}return typeFactory.createStructType(Pair.zip(names, types));}
}table类中其中有两个比较关键的方法 scan: 扫描表内容, 我们这里将sheet页面的数据内容解析存储最后交给calcite getRowType: 获取字段信息, 我们这里默认使用第一条记录作为表头(row[0]) 并解析为字段信息, 字段规则跟csv一样 name:string, 冒号前面的是字段key, 冒号后面的是字段类型, 如果未指定字段类型, 则解析为UNKNOWN, 后续JavaFileTypeEnum会进行类型推断, 最终在结果处理时calcite也会进行推断 deduceRowType: 推断字段类型, 方法中使用JavaFileTypeEnum枚举类对java type sql type 字段值转化处理方法 进行管理 5.4 ColumnTypeEnum package com.ldx.calcite.excel.enums;import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.calcite.avatica.util.DateTimeUtils;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.util.CellUtil;import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.Optional;
import java.util.TimeZone;
import java.util.function.Function;/*** type converter*/
Slf4j
Getter
public enum JavaFileTypeEnum {STRING(string, SqlTypeName.VARCHAR, Cell::getStringCellValue),BOOLEAN(boolean, SqlTypeName.BOOLEAN, Cell::getBooleanCellValue),BYTE(byte, SqlTypeName.TINYINT, Cell::getStringCellValue),CHAR(char, SqlTypeName.CHAR, Cell::getStringCellValue),SHORT(short, SqlTypeName.SMALLINT, Cell::getNumericCellValue),INT(int, SqlTypeName.INTEGER, cell - (Double.valueOf(cell.getNumericCellValue()).intValue())),LONG(long, SqlTypeName.BIGINT, cell - (Double.valueOf(cell.getNumericCellValue()).longValue())),FLOAT(float, SqlTypeName.REAL, Cell::getNumericCellValue),DOUBLE(double, SqlTypeName.DOUBLE, Cell::getNumericCellValue),DATE(date, SqlTypeName.DATE, getValueWithDate()),TIMESTAMP(timestamp, SqlTypeName.TIMESTAMP, getValueWithTimestamp()),TIME(time, SqlTypeName.TIME, getValueWithTime()),UNKNOWN(unknown, SqlTypeName.UNKNOWN, getValueWithUnknown()),;// cell typeprivate final String typeName;// sql typeprivate final SqlTypeName sqlTypeName;// value convert funcprivate final FunctionCell, Object cellValueFunc;private static final FastDateFormat TIME_FORMAT_DATE;private static final FastDateFormat TIME_FORMAT_TIME;private static final FastDateFormat TIME_FORMAT_TIMESTAMP;static {final TimeZone gmt TimeZone.getTimeZone(GMT);TIME_FORMAT_DATE FastDateFormat.getInstance(yyyy-MM-dd, gmt);TIME_FORMAT_TIME FastDateFormat.getInstance(HH:mm:ss, gmt);TIME_FORMAT_TIMESTAMP FastDateFormat.getInstance(yyyy-MM-dd HH:mm:ss, gmt);}JavaFileTypeEnum(String typeName, SqlTypeName sqlTypeName, FunctionCell, Object cellValueFunc) {this.typeName typeName;this.sqlTypeName sqlTypeName;this.cellValueFunc cellValueFunc;}public static OptionalJavaFileTypeEnum of(String typeName) {return Arrays.stream(values()).filter(type - StringUtils.equalsIgnoreCase(typeName, type.getTypeName())).findFirst();}public static SqlTypeName findSqlTypeName(String typeName) {final OptionalJavaFileTypeEnum javaFileTypeOptional of(typeName);if (javaFileTypeOptional.isPresent()) {return javaFileTypeOptional.get().getSqlTypeName();}return SqlTypeName.UNKNOWN;}public Object getCellValue(Cell cell) {return cellValueFunc.apply(cell);}public static FunctionCell, Object getValueWithUnknown() {return cell - {if (ObjectUtils.isEmpty(cell)) {return null;}switch (cell.getCellType()) {case STRING:return cell.getStringCellValue();case NUMERIC:if (DateUtil.isCellDateFormatted(cell)) {// 如果是日期类型返回日期对象return cell.getDateCellValue();}else {// 否则返回数值return cell.getNumericCellValue();}case BOOLEAN:return cell.getBooleanCellValue();case FORMULA:// 对于公式单元格先计算公式结果再获取其值try {return cell.getNumericCellValue();}catch (Exception e) {try {return cell.getStringCellValue();}catch (Exception ex) {log.error(parse unknown data error, cellRowIndex:{}, cellColumnIndex:{}, cell.getRowIndex(), cell.getColumnIndex(), e);return null;}}case BLANK:return ;default:return null;}};}public static FunctionCell, Object getValueWithDate() {return cell - {Date date cell.getDateCellValue();if(ObjectUtils.isEmpty(date)) {return null;}try {final String formated new SimpleDateFormat(yyyy-MM-dd).format(date);Date newDate TIME_FORMAT_DATE.parse(formated);return (int) (newDate.getTime() / DateTimeUtils.MILLIS_PER_DAY);}catch (ParseException e) {log.error(parse date error, date:{}, date, e);}return null;};}public static FunctionCell, Object getValueWithTimestamp() {return cell - {Date date cell.getDateCellValue();if(ObjectUtils.isEmpty(date)) {return null;}try {final String formated new SimpleDateFormat(yyyy-MM-dd HH:mm:ss).format(date);Date newDate TIME_FORMAT_TIMESTAMP.parse(formated);return (int) newDate.getTime();}catch (ParseException e) {log.error(parse timestamp error, date:{}, date, e);}return null;};}public static FunctionCell, Object getValueWithTime() {return cell - {Date date cell.getDateCellValue();if(ObjectUtils.isEmpty(date)) {return null;}try {final String formated new SimpleDateFormat(HH:mm:ss).format(date);Date newDate TIME_FORMAT_TIME.parse(formated);return newDate.getTime();}catch (ParseException e) {log.error(parse time error, date:{}, date, e);}return null;};}
} 该枚举类主要管理了java type sql type cell value convert func, 方便统一管理类型映射及单元格内容提取时的转换方法(这里借用了java8 function函数特性) 注: 这里的日期转换只能这样写, 即使用GMT的时区(抄的calcite-file), 要不然输出的日期时间一直有时差... 6. 测试查询 package com.ldx.calcite;import com.ldx.calcite.excel.ExcelSchemaFactory;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.schema.Schema;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.util.Sources;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.testng.collections.Maps;import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;
import java.util.Properties;Slf4j
public class CalciteExcelTest {private static Connection connection;private static SchemaPlus rootSchema;private static CalciteConnection calciteConnection;BeforeAllSneakyThrowspublic static void beforeAll() {Properties info new Properties();// 不区分sql大小写info.setProperty(CalciteConnectionProperty.CASE_SENSITIVE.camelName(), false);// 创建Calcite连接connection DriverManager.getConnection(jdbc:calcite:, info);calciteConnection connection.unwrap(CalciteConnection.class);// 构建RootSchema在Calcite中RootSchema是所有数据源schema的parent多个不同数据源schema可以挂在同一个RootSchema下rootSchema calciteConnection.getRootSchema();}TestSneakyThrowspublic void test_execute_query() {final Schema schema ExcelSchemaFactory.INSTANCE.create(resourcePath(file/test.xlsx));rootSchema.add(test, schema);// 设置默认的schemacalciteConnection.setSchema(test);final Statement statement calciteConnection.createStatement();ResultSet resultSet statement.executeQuery(SELECT * FROM user_info);printResultSet(resultSet);System.out.println();ResultSet resultSet2 statement.executeQuery(SELECT * FROM test.user_info where id 110 and birthday 2003-01-01);printResultSet(resultSet2);System.out.println();ResultSet resultSet3 statement.executeQuery(SELECT * FROM test.user_info ui inner join test.role_info ri on ui.role_id ri.id);printResultSet(resultSet3);}AfterAllSneakyThrowspublic static void closeResource() {connection.close();}private static String resourcePath(String path) {final URL url CalciteExcelTest.class.getResource(/ path);return Sources.of(url).file().getAbsolutePath();}public static void printResultSet(ResultSet resultSet) throws SQLException {// 获取 ResultSet 元数据ResultSetMetaData metaData resultSet.getMetaData();// 获取列数int columnCount metaData.getColumnCount();log.info(Number of columns: {},columnCount);// 遍历 ResultSet 并打印结果while (resultSet.next()) {final MapString, String item Maps.newHashMap();// 遍历每一列并打印for (int i 1; i columnCount; i) {String columnName metaData.getColumnName(i);String columnValue resultSet.getString(i);item.put(columnName, columnValue);}log.info(item.toString());}}
} 测试结果如下: 文章转载自张铁牛 原文链接4. 使用sql查询excel内容 - 张铁牛 - 博客园 体验地址引迈 - JNPF快速开发平台_低代码开发平台_零代码开发平台_流程设计器_表单引擎_工作流引擎_软件架构