Parsing SQL query in Java -
i know using prepared statement can set column values. here want is, have list of queries written execute on same table different column values. e.g.
select * tablename t1 t1.tablecolumnid=4 , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid select * tablename t1 t1.tablecolumnid=6 , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid
as can see both queries same except tablecolumnid
value. want save in collection
select * tablename t1 t1.tablecolumnid=? , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid
so won't have duplicate queries (where values not considered).
how can this?
- one approach consist in defining subset of sql grammar sufficient parse queries, write parser grammar,
- compare queries , find parts identical, , differ,
- locate literal values
4
,6
,'test'
in queries, build (flat) syntactic tree, , compare trees 1 identify literal values may differ 1 query another.
update
to parse sql, user parser generator antlr or javacc. antlr , javacc grammars exist sql, , start 1 of them.
that said, think approach overkill in instance; rather use third.
update 2: (third method)
to locate literal strings , numbers, can use regexp:
private static final pattern const_pattern = pattern.compile("([^0-9a-za-z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)" + "(?:[ee][+-][0-9]+])?" + "|(?:\\'[^']*\\')+)", pattern.case_insensitive);
you can parse query while generating following structure:
private static class parameterizedquery { final string sql; final parameter[] params; parameterizedquery(string sql, parameter[] params) { this.sql = sql; this.params = params.clone(); } } private static class parameter { final int position; final string value; parameter(int position, string value) { this.position = position; this.value = value; } }
the resulting sql query input query literals replaced question marks. parsing done follows:
private static parameterizedquery parse(string query) { list<parameter> parms = new arraylist<>(); matcher matcher = const_pattern.matcher(query); int start = 0; stringbuilder buf = new stringbuilder(); while (matcher.find()) { int pos = matcher.start(); buf.append(query, start, pos) .append(matcher.group(1)) .append("?"); parms.add(new parameter(buf.length()-1,matcher.group(2))); start = matcher.end(); } buf.append(query, start, query.length()); return new parameterizedquery( buf.tostring(), parms.toarray(new parameter[parms.size()])); }
now, if have list of queries, , want keep parameter not equal in input queries, parse queries, producing array of parameterizedquery, , simplify array:
private static parameterizedquery[] simplify(parameterizedquery[] queries) { if (queries.length == 0) { return queries; } parameterizedquery prev = null; boolean[] diff = null; (parameterizedquery cur: queries) { if (prev == null) { diff = new boolean[cur.params.length]; } else { if (!cur.sql.equals(prev.sql)) { throw new runtimeexception( "queries different: [" + prev.sql + "] , [" + cur.sql + "]"); } else if (cur.params.length != prev.params.length) { throw new runtimeexception( "different number of parameters: [" + prev.params.length + "] , [" + cur.params.length + "]"); } (int = 0; < diff.length; ++i) { if (!cur.params[i].value.equals(prev.params[i].value)) { diff[i] = true; } } } prev = cur; } if (and(diff)) { return queries; } parameterizedquery[] result = new parameterizedquery[queries.length]; result[0] = expandquery(queries[0].sql, queries[0].params, diff); (int = 1; < queries.length; ++i) { result[i] = new parameterizedquery(result[0].sql, keep(queries[i].params, result[0].params, diff)); } return result; } private static boolean and(boolean[] arr) { (boolean b: arr) { if (!b) { return false; } } return true; } private static parameterizedquery expandquery(string query, parameter[] params, boolean[] diff) { int count = 0; (boolean b: diff) { if (b) { ++count; } } parameter[] result = new parameter[count]; int r = 0; int start = 0; stringbuilder buf = new stringbuilder(); (int = 0; < diff.length; ++i) { parameter parm = params[i]; if (!diff[i]) { // expand param buf.append(query, start, parm.position); buf.append(parm.value); start = parm.position+1; } else { buf.append(query, start, parm.position); result[r++] = new parameter(buf.length(), parm.value); start = parm.position; } } buf.append(query, start, query.length()); return new parameterizedquery(buf.tostring(), result); } private static parameter[] keep(parameter[] params, parameter[] ref, boolean[] diff) { parameter[] result = new parameter[ref.length]; int j = 0; (int = 0; < params.length; ++i) { if (diff[i]) { result[j] = new parameter(ref[j].position, params[i].value); ++j; } } return result; }
here's program resolves example:
public class main { private static final string[] queries = { "select * tablename t1 t1.tablecolumnid=4 , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid", "select * tablename t1 t1.tablecolumnid=6 , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid", }; private static final pattern const_pattern = pattern.compile("([^0-9a-za-z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)" + "(?:[ee][+-][0-9]+])?" + "|(?:\\'[^']*\\')+)", pattern.case_insensitive); private static class parameterizedquery { final string sql; final parameter[] params; parameterizedquery(string sql, parameter[] params) { this.sql = sql; this.params = params.clone(); } } private static class parameter { final int position; final string value; parameter(int position, string value) { this.position = position; this.value = value; } } public static void main(string[] args) { parameterizedquery[] queries = new parameterizedquery[queries.length]; (int = 0; < queries.length; ++i) { queries[i] = parse(queries[i]); } (parameterizedquery cur: queries) { system.out.println(cur.sql); int = 0; (parameter parm: cur.params) { system.out.println(" " + (++i) + ": " + parm.value); } } queries = simplify(queries); (parameterizedquery cur: queries) { system.out.println(cur.sql); int = 0; (parameter parm: cur.params) { system.out.println(" " + (++i) + ": " + parm.value); } } } private static parameterizedquery parse(string query) { list<parameter> parms = new arraylist<>(); matcher matcher = const_pattern.matcher(query); int start = 0; stringbuilder buf = new stringbuilder(); while (matcher.find()) { int pos = matcher.start(); buf.append(query, start, pos) .append(matcher.group(1)) .append("?"); parms.add(new parameter(buf.length()-1,matcher.group(2))); start = matcher.end(); } buf.append(query, start, query.length()); return new parameterizedquery( buf.tostring(), parms.toarray(new parameter[parms.size()])); } private static parameterizedquery[] simplify(parameterizedquery[] queries) { if (queries.length == 0) { return queries; } parameterizedquery prev = null; boolean[] diff = null; (parameterizedquery cur: queries) { if (prev == null) { diff = new boolean[cur.params.length]; } else { if (!cur.sql.equals(prev.sql)) { throw new runtimeexception( "queries different: [" + prev.sql + "] , [" + cur.sql + "]"); } else if (cur.params.length != prev.params.length) { throw new runtimeexception( "different number of parameters: [" + prev.params.length + "] , [" + cur.params.length + "]"); } (int = 0; < diff.length; ++i) { if (!cur.params[i].value.equals(prev.params[i].value)) { diff[i] = true; } } } prev = cur; } if (and(diff)) { return queries; } parameterizedquery[] result = new parameterizedquery[queries.length]; result[0] = expandquery(queries[0].sql, queries[0].params, diff); (int = 1; < queries.length; ++i) { result[i] = new parameterizedquery(result[0].sql, keep(queries[i].params, result[0].params, diff)); } return result; } private static boolean and(boolean[] arr) { (boolean b: arr) { if (!b) { return false; } } return true; } private static parameterizedquery expandquery(string query, parameter[] params, boolean[] diff) { int count = 0; (boolean b: diff) { if (b) { ++count; } } parameter[] result = new parameter[count]; int r = 0; int start = 0; stringbuilder buf = new stringbuilder(); (int = 0; < diff.length; ++i) { parameter parm = params[i]; if (!diff[i]) { // expand param buf.append(query, start, parm.position); buf.append(parm.value); start = parm.position+1; } else { buf.append(query, start, parm.position); result[r++] = new parameter(buf.length(), parm.value); start = parm.position; } } buf.append(query, start, query.length()); return new parameterizedquery(buf.tostring(), result); } private static parameter[] keep(parameter[] params, parameter[] ref, boolean[] diff) { parameter[] result = new parameter[ref.length]; int j = 0; (int = 0; < params.length; ++i) { if (diff[i]) { result[j] = new parameter(ref[j].position, params[i].value); ++j; } } return result; } }
the output is:
select * tablename t1 t1.tablecolumnid=? , t1.tablecolumnname=? inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid 1: 4 2: 'test' select * tablename t1 t1.tablecolumnid=? , t1.tablecolumnname=? inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid 1: 6 2: 'test' select * tablename t1 t1.tablecolumnid=? , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid 1: 4 select * tablename t1 t1.tablecolumnid=? , t1.tablecolumnname='test' inner join tablename2 t2 on t1.tablecolumnid=t2.tablecolumnid 1: 6
Comments
Post a Comment