summaryrefslogtreecommitdiffstats
path: root/sql/src/main/java/SQLite/StringEncoder.java
blob: c2f20adcc34c9c01684355c16ca89065787f49ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
package SQLite;

/**
 * String encoder/decoder for SQLite.
 *
 * This module was kindly donated by Eric van der Maarel of Nedap N.V.
 *
 * This encoder was implemented based on an original idea from an anonymous
 * author in the source code of the SQLite distribution.
 * I feel obliged to provide a quote from the original C-source code:
 *
 * "The author disclaims copyright to this source code.  In place of
 *  a legal notice, here is a blessing:
 *
 *     May you do good and not evil.
 *     May you find forgiveness for yourself and forgive others.
 *     May you share freely, never taking more than you give."
 *
 */

public class StringEncoder {

    /**
     * Encodes the given byte array into a string that can be used by
     * the SQLite database. The database cannot handle null (0x00) and
     * the character '\'' (0x27). The encoding consists of escaping
     * these characters with a reserved character (0x01). The escaping
     * is applied after determining and applying a shift that minimizes
     * the number of escapes required.
     * With this encoding the data of original size n is increased to a
     * maximum of 1+(n*257)/254.
     * For sufficiently large n the overhead is thus less than 1.2%.
     * @param a the byte array to be encoded. A null reference is handled as
     *     an empty array.
     * @return the encoded bytes as a string. When an empty array is
     *     provided a string of length 1 is returned, the value of
     *     which is bogus.
     *     When decoded with this class' <code>decode</code> method
     *     a string of size 1 will return an empty byte array.
     */

    public static String encode(byte[] a) {
    // check input
    if (a == null || a.length == 0) {
        // bogus shift, no data
        return "x";
    }
    // determine count
    int[] cnt = new int[256];
    for (int i = 0 ; i < a.length; i++) {
        cnt[a[i] & 0xff]++;
    }
    // determine shift for minimum number of escapes
    int shift = 1;
    int nEscapes = a.length;
    for (int i = 1; i < 256; i++) {
        if (i == '\'') {
        continue;
        }
        int sum = cnt[i] + cnt[(i + 1) & 0xff] + cnt[(i + '\'') & 0xff];
        if (sum < nEscapes) {
        nEscapes = sum;
        shift = i;
        if (nEscapes == 0) {
            // cannot become smaller
            break;
        }
        }
    }
    // construct encoded output
    int outLen = a.length + nEscapes + 1;
    StringBuffer out = new StringBuffer(outLen);
    out.append((char)shift);
    for (int i = 0; i < a.length; i++) {
        // apply shift
        char c = (char)((a[i] - shift)&0xff);
        // insert escapes
        if (c == 0) { // forbidden
        out.append((char)1);
        out.append((char)1);
        } else if (c == 1) { // escape character
        out.append((char)1);
        out.append((char)2);
        } else if (c == '\'') { // forbidden
        out.append((char)1);
        out.append((char)3);
        } else {
        out.append(c);
        }
    }
    return out.toString();
    }

    /**
     * Decodes the given string that is assumed to be a valid encoding
     * of a byte array. Typically the given string is generated by
     * this class' <code>encode</code> method.
     * @param s the given string encoding.
     * @return the byte array obtained from the decoding.
     * @throws IllegalArgumentException when the string given is not
     *    a valid encoded string for this encoder.
     */

    public static byte[] decode(String s) {
    char[] a = s.toCharArray();
    if (a.length > 2 && a[0] == 'X' &&
        a[1] == '\'' && a[a.length-1] == '\'') {
        // SQLite3 BLOB syntax
        byte[] result = new byte[(a.length-3)/2];
        for (int i = 2, k = 0; i < a.length - 1; i += 2, k++) {
        byte tmp = (byte) (a[i] - '0');
        if (tmp > 15) {
            tmp -= 0x20;
        }
        result[k] = (byte) (tmp << 4);
        tmp = (byte) (a[i+1] - '0');
        if (tmp > 15) {
            tmp -= 0x20;
        }
        result[k] |= tmp;
        }
        return result;
    }
    // first element is the shift
    byte[] result = new byte[a.length-1];
    int i = 0;
    int shift = s.charAt(i++);
    int j = 0;
    while (i < s.length()) {
        int c;
        if ((c = s.charAt(i++)) == 1) { // escape character found
        if ((c = s.charAt(i++)) == 1) {
            c = 0;
        } else if (c == 2) {
            c = 1;
        } else if (c == 3) {
            c = '\'';
        } else {
            throw new IllegalArgumentException(
            "invalid string passed to decoder: " + j);
        }
        }
        // do shift
        result[j++] = (byte)((c + shift) & 0xff);
    }
    int outLen = j;
    // provide array of correct length
    if (result.length != outLen) {
        result = byteCopy(result, 0, outLen, new byte[outLen]);
    }
    return result;
    }

    /**
     * Copies count elements from source, starting at element with
     * index offset, to the given target.
     * @param source the source.
     * @param offset the offset.
     * @param count the number of elements to be copied.
     * @param target the target to be returned.
     * @return the target being copied to.
     */

    private static byte[] byteCopy(byte[] source, int offset,
                   int count, byte[] target) {
    for (int i = offset, j = 0; i < offset + count; i++, j++) {
        target[j] = source[i];
    }
    return target;
    }


    static final char[] xdigits = {
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
    };

    /**
     * Encodes the given byte array into SQLite3 blob notation, ie X'..'
     * @param a the byte array to be encoded. A null reference is handled as
     *     an empty array.
     * @return the encoded bytes as a string.
     */

    public static String encodeX(byte[] a) {
    // check input
    if (a == null || a.length == 0) {
        return "X''";
    }
    int outLen = a.length + 3;
    StringBuffer out = new StringBuffer(outLen);
    out.append('X');
    out.append('\'');
    for (int i = 0; i < a.length; i++) {
        out.append(xdigits[a[i] >> 4]);
        out.append(xdigits[a[i] & 0x0F]);
    }
    out.append('\'');
    return out.toString();
    }
}