001package daikon.inv;
002
003import daikon.ProglangType;
004import daikon.VarInfo;
005import java.io.Serializable;
006import java.util.Arrays;
007import org.plumelib.util.LimitedSizeLongSet;
008
009// It is a thin wrapper around LimitedSizeLongSet.
010// (Actually, maybe it will just subclass that.)
011
012/**
013 * ValueSet stores a set of values. The implementation only stores integers. When adding a value,
014 * for efficiency its hash code is added rather than the value itself. If the set size exceeds a
015 * specified limit, then its rep is nulled.
016 *
017 * <p>The size of this class is used for efficient justification tests.
018 *
019 * <p>Relevant subclasses are:
020 *
021 * <ul>
022 *   <li>ValueSetScalar
023 *   <li>ValueSetFloat
024 *   <li>ValueSetScalarArray
025 *   <li>ValueSetFloatArray
026 *   <li>ValueSetString
027 *   <li>ValueSetStringArray
028 * </ul>
029 *
030 * These subclasses store a hashcode.
031 *
032 * <p><b>Caveat:</b> The size is an approximation, because if two values happen to have the same
033 * hash value, then the sets size reflects only one of them. (As an example, the hash codes of 0L
034 * and -1L are the same. This implementation has a special case to avoid that problem for long
035 * values, but the hash codes of the arrays {0L} and {-1L} are also the same and this implementation
036 * does not work around that problem.)
037 *
038 * <p>An alternative approach would be to store actual values, rather than approximating. That would
039 * use more space than the current implementation does, but it would give a more accurate
040 * approximation of the size, Here are some possible implementation approaches:
041 *
042 * <ul>
043 *   <li>use a TreeSet with a special comparator
044 *   <li>use a HashSet that stores wrappers, where the wrappers redefine hashCode. (That is
045 *       necessary because arrays don't override {@code Object.hashCode}.
046 * </ul>
047 */
048public abstract class ValueSet extends LimitedSizeLongSet implements Serializable, Cloneable {
049  // We are Serializable, so we specify a version to allow changes to
050  // method signatures without breaking serialization.  If you add or
051  // remove fields, you should change this number to the current date.
052  static final long serialVersionUID = 20020122L;
053
054  protected ValueSet(int max_values) {
055    super(max_values);
056  }
057
058  // There is one ValueSet per variable (not one per slice or invariant),
059  // so pre-allocating an array with 44 slots should not be a problem.  If
060  // it is, then change LimitedSizeLongSet to optionally not pre-allocate
061  // the entire array.
062  /**
063   * The number 44 comes from the fact that .9^44 &lt; .01. So, if the confidence limit is .01 and
064   * the probability of a given event is set at .1, then 44 values is enough to demonstrate that
065   * never seeing the event is statistically justified (not a coincidence).
066   */
067  static final int DEFAULT_MAX_VALUES = 44;
068
069  public static ValueSet factory(VarInfo var_info) {
070    ProglangType rep_type = var_info.rep_type;
071    boolean is_scalar = rep_type.isScalar();
072    if (is_scalar) {
073      return new ValueSet.ValueSetScalar(DEFAULT_MAX_VALUES);
074    } else if (rep_type == ProglangType.INT_ARRAY) {
075      return new ValueSet.ValueSetScalarArray(DEFAULT_MAX_VALUES);
076    } else if (rep_type == ProglangType.DOUBLE) {
077      return new ValueSet.ValueSetFloat(DEFAULT_MAX_VALUES);
078    } else if (rep_type == ProglangType.DOUBLE_ARRAY) {
079      return new ValueSet.ValueSetFloatArray(DEFAULT_MAX_VALUES);
080    } else if (rep_type == ProglangType.STRING) {
081      return new ValueSet.ValueSetString(DEFAULT_MAX_VALUES);
082    } else if (rep_type == ProglangType.STRING_ARRAY) {
083      return new ValueSet.ValueSetStringArray(DEFAULT_MAX_VALUES);
084    } else {
085      throw new Error(
086          "Can't create ValueSet for " + var_info.name() + " with rep type " + rep_type);
087    }
088  }
089
090  /** Add the specified object (really, its hashcode) to the set. */
091  public abstract void add(Object v1);
092
093  /** Add stats from the specified value set. */
094  protected abstract void add_stats(ValueSet other);
095
096  /** Returns a short description of the values seen. */
097  public abstract String repr_short();
098
099  public void add(ValueSet other) {
100    if (this.getClass() != other.getClass()) {
101      throw new Error("ValueSet type mismatch: " + this.getClass() + " " + other.getClass());
102    }
103    addAll(other);
104    add_stats(other);
105  }
106
107  public static class ValueSetScalar extends ValueSet {
108    // We are Serializable, so we specify a version to allow changes to
109    // method signatures without breaking serialization.  If you add or
110    // remove fields, you should change this number to the current date.
111    static final long serialVersionUID = 20031017L;
112
113    long min_val = Long.MAX_VALUE;
114    long max_val = Long.MIN_VALUE;
115
116    public ValueSetScalar(int max_values) {
117      super(max_values);
118    }
119
120    @Override
121    public void add(Object v1) {
122      assert v1 != null;
123      long val = ((Long) v1).longValue();
124      if (val < min_val) {
125        min_val = val;
126      }
127      if (val > max_val) {
128        max_val = val;
129      }
130      add(val);
131    }
132
133    @Override
134    protected void add_stats(ValueSet other) {
135      ValueSetScalar vs = (ValueSetScalar) other;
136      min_val = Math.min(min_val, vs.min_val);
137      max_val = Math.max(max_val, vs.max_val);
138    }
139
140    public long min() {
141      return min_val;
142    }
143
144    public long max() {
145      return max_val;
146    }
147
148    @Override
149    public String repr_short() {
150      if (size() > 0) {
151        return (size() + " values " + min_val + ".." + max_val);
152      } else {
153        return "0 values";
154      }
155    }
156  }
157
158  public static class ValueSetFloat extends ValueSet {
159    // We are Serializable, so we specify a version to allow changes to
160    // method signatures without breaking serialization.  If you add or
161    // remove fields, you should change this number to the current date.
162    static final long serialVersionUID = 20031017L;
163
164    double min_val = Double.MAX_VALUE;
165    double max_val = -Double.MAX_VALUE;
166    boolean can_be_NaN = false;
167
168    public ValueSetFloat(int max_values) {
169      super(max_values);
170    }
171
172    @Override
173    public void add(Object v1) {
174      assert v1 != null;
175      double val = ((Double) v1).doubleValue();
176      if (val < min_val) {
177        min_val = val;
178      }
179      if (val > max_val) {
180        max_val = val;
181      }
182      if (Double.isNaN(val)) {
183        can_be_NaN = true;
184        // use the canonical NaN value
185        val = Double.NaN;
186      }
187      add(Double.doubleToLongBits(val));
188    }
189
190    @Override
191    protected void add_stats(ValueSet other) {
192      ValueSetFloat vs = (ValueSetFloat) other;
193      min_val = Math.min(min_val, vs.min_val);
194      max_val = Math.max(max_val, vs.max_val);
195      can_be_NaN = can_be_NaN || vs.can_be_NaN;
196    }
197
198    public double min() {
199      return min_val;
200    }
201
202    public double max() {
203      return max_val;
204    }
205
206    public boolean canBeNaN() {
207      return can_be_NaN;
208    }
209
210    @Override
211    public String repr_short() {
212      if (size() > 0) {
213        return (size()
214            + " values "
215            + min_val
216            + ".."
217            + max_val
218            + "; "
219            + (can_be_NaN ? "can be " : "never ")
220            + "NaN");
221      } else {
222        return "0 values";
223      }
224    }
225  }
226
227  public static class ValueSetScalarArray extends ValueSet {
228    // We are Serializable, so we specify a version to allow changes to
229    // method signatures without breaking serialization.  If you add or
230    // remove fields, you should change this number to the current date.
231    static final long serialVersionUID = 20031017L;
232
233    long min_val = Long.MAX_VALUE;
234    long max_val = Long.MIN_VALUE;
235    int max_length = 0;
236    int elem_cnt = 0;
237    int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements
238
239    public ValueSetScalarArray(int max_values) {
240      super(max_values);
241    }
242
243    @Override
244    public void add(Object v1) {
245      assert v1 != null;
246      long[] val = (long[]) v1;
247      for (int i = 0; i < val.length; i++) {
248        if (val[i] < min_val) {
249          min_val = val[i];
250        }
251        if (val[i] > max_val) {
252          max_val = val[i];
253        }
254      }
255      elem_cnt += val.length;
256      if (val.length > 1) nonsingleton_arr_cnt++;
257      if (val.length > max_length) max_length = val.length;
258      add(Arrays.hashCode((long[]) v1));
259    }
260
261    @Override
262    protected void add_stats(ValueSet other) {
263      ValueSetScalarArray vs = (ValueSetScalarArray) other;
264      min_val = Math.min(min_val, vs.min_val);
265      max_val = Math.max(max_val, vs.max_val);
266      elem_cnt += vs.elem_cnt;
267      nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt;
268      max_length = Math.max(max_length, vs.max_length);
269    }
270
271    public long min() {
272      return min_val;
273    }
274
275    public long max() {
276      return max_val;
277    }
278
279    public int elem_cnt() {
280      return elem_cnt;
281    }
282
283    public int nonsingleton_arr_cnt() {
284      return nonsingleton_arr_cnt;
285    }
286
287    public int max_length() {
288      return max_length;
289    }
290
291    @Override
292    public String repr_short() {
293      if (size() > 0) {
294        return (size() + " values " + min_val + ".." + max_val);
295      } else {
296        return "0 values";
297      }
298    }
299  }
300
301  public static class ValueSetFloatArray extends ValueSet {
302    // We are Serializable, so we specify a version to allow changes to
303    // method signatures without breaking serialization.  If you add or
304    // remove fields, you should change this number to the current date.
305    static final long serialVersionUID = 20031017L;
306
307    double min_val = Long.MAX_VALUE;
308    double max_val = Long.MIN_VALUE;
309    boolean can_be_NaN = false;
310    int max_length = 0;
311    int elem_cnt = 0;
312    int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements
313
314    public ValueSetFloatArray(int max_values) {
315      super(max_values);
316    }
317
318    @Override
319    public void add(Object v1) {
320      assert v1 != null;
321      double[] val = (double[]) v1;
322      for (int i = 0; i < val.length; i++) {
323        if (val[i] < min_val) {
324          min_val = val[i];
325        }
326        if (val[i] > max_val) {
327          max_val = val[i];
328        }
329        if (Double.isNaN(val[i])) {
330          can_be_NaN = true;
331        }
332      }
333      elem_cnt += val.length;
334      if (val.length > 1) nonsingleton_arr_cnt++;
335      if (val.length > max_length) max_length = val.length;
336      add(Arrays.hashCode(val));
337    }
338
339    @Override
340    protected void add_stats(ValueSet other) {
341      ValueSetFloatArray vs = (ValueSetFloatArray) other;
342      min_val = Math.min(min_val, vs.min_val);
343      max_val = Math.max(max_val, vs.max_val);
344      can_be_NaN = can_be_NaN || vs.can_be_NaN;
345      elem_cnt += vs.elem_cnt;
346      nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt;
347      max_length = Math.max(max_length, vs.max_length);
348    }
349
350    public double min() {
351      return min_val;
352    }
353
354    public double max() {
355      return max_val;
356    }
357
358    public boolean canBeNaN() {
359      return can_be_NaN;
360    }
361
362    public int elem_cnt() {
363      return elem_cnt;
364    }
365
366    public int nonsingleton_arr_cnt() {
367      return nonsingleton_arr_cnt;
368    }
369
370    public int max_length() {
371      return max_length;
372    }
373
374    @Override
375    public String repr_short() {
376      if (size() > 0) {
377        return (size()
378            + " values "
379            + min_val
380            + ".."
381            + max_val
382            + "; "
383            + (can_be_NaN ? "can be " : "never ")
384            + "NaN");
385      } else {
386        return "0 values";
387      }
388    }
389  }
390
391  public static class ValueSetString extends ValueSet {
392    // We are Serializable, so we specify a version to allow changes to
393    // method signatures without breaking serialization.  If you add or
394    // remove fields, you should change this number to the current date.
395    static final long serialVersionUID = 20031017L;
396
397    public ValueSetString(int max_values) {
398      super(max_values);
399    }
400
401    @Override
402    public void add(Object v1) {
403      assert v1 != null;
404      add(((String) v1).hashCode());
405    }
406
407    @Override
408    protected void add_stats(ValueSet other) {}
409
410    @Override
411    public String repr_short() {
412      return (size() + " values ");
413    }
414  }
415
416  public static class ValueSetStringArray extends ValueSet {
417    // We are Serializable, so we specify a version to allow changes to
418    // method signatures without breaking serialization.  If you add or
419    // remove fields, you should change this number to the current date.
420    static final long serialVersionUID = 20031017L;
421
422    int elem_cnt = 0;
423    int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements
424
425    public ValueSetStringArray(int max_values) {
426      super(max_values);
427    }
428
429    @Override
430    public void add(Object v1) {
431      assert v1 != null;
432      String[] val = (String[]) v1;
433      elem_cnt += val.length;
434      if (val.length > 1) nonsingleton_arr_cnt++;
435      add(Arrays.deepHashCode(val));
436    }
437
438    @Override
439    protected void add_stats(ValueSet other) {
440      ValueSetStringArray vs = (ValueSetStringArray) other;
441      elem_cnt += vs.elem_cnt;
442      nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt;
443    }
444
445    public int elem_cnt() {
446      return elem_cnt;
447    }
448
449    public int nonsingleton_arr_cnt() {
450      return nonsingleton_arr_cnt;
451    }
452
453    @Override
454    public String repr_short() {
455      return (size() + " values ");
456    }
457  }
458}