001package daikon.inv; 002 003import daikon.ProglangType; 004import daikon.VarInfo; 005import java.io.Serializable; 006import java.util.Arrays; 007import org.plumelib.util.LimitedSizeLongSet; 008 009// It is a thin wrapper around LimitedSizeLongSet. 010// (Actually, maybe it will just subclass that.) 011 012/** 013 * ValueSet stores a set of values. The implementation only stores integers. When adding a value, 014 * for efficiency its hash code is added rather than the value itself. If the set size exceeds a 015 * specified limit, then its rep is nulled. 016 * 017 * <p>The size of this class is used for efficient justification tests. 018 * 019 * <p>Relevant subclasses are: 020 * 021 * <ul> 022 * <li>ValueSetScalar 023 * <li>ValueSetFloat 024 * <li>ValueSetScalarArray 025 * <li>ValueSetFloatArray 026 * <li>ValueSetString 027 * <li>ValueSetStringArray 028 * </ul> 029 * 030 * These subclasses store a hashcode. 031 * 032 * <p><b>Caveat:</b> The size is an approximation, because if two values happen to have the same 033 * hash value, then the sets size reflects only one of them. (As an example, the hash codes of 0L 034 * and -1L are the same. This implementation has a special case to avoid that problem for long 035 * values, but the hash codes of the arrays {0L} and {-1L} are also the same and this implementation 036 * does not work around that problem.) 037 * 038 * <p>An alternative approach would be to store actual values, rather than approximating. That would 039 * use more space than the current implementation does, but it would give a more accurate 040 * approximation of the size, Here are some possible implementation approaches: 041 * 042 * <ul> 043 * <li>use a TreeSet with a special comparator 044 * <li>use a HashSet that stores wrappers, where the wrappers redefine hashCode. (That is 045 * necessary because arrays don't override {@code Object.hashCode}. 046 * </ul> 047 */ 048public abstract class ValueSet extends LimitedSizeLongSet implements Serializable, Cloneable { 049 // We are Serializable, so we specify a version to allow changes to 050 // method signatures without breaking serialization. If you add or 051 // remove fields, you should change this number to the current date. 052 static final long serialVersionUID = 20020122L; 053 054 protected ValueSet(int max_values) { 055 super(max_values); 056 } 057 058 // There is one ValueSet per variable (not one per slice or invariant), 059 // so pre-allocating an array with 44 slots should not be a problem. If 060 // it is, then change LimitedSizeLongSet to optionally not pre-allocate 061 // the entire array. 062 /** 063 * The number 44 comes from the fact that .9^44 < .01. So, if the confidence limit is .01 and 064 * the probability of a given event is set at .1, then 44 values is enough to demonstrate that 065 * never seeing the event is statistically justified (not a coincidence). 066 */ 067 static final int DEFAULT_MAX_VALUES = 44; 068 069 public static ValueSet factory(VarInfo var_info) { 070 ProglangType rep_type = var_info.rep_type; 071 boolean is_scalar = rep_type.isScalar(); 072 if (is_scalar) { 073 return new ValueSet.ValueSetScalar(DEFAULT_MAX_VALUES); 074 } else if (rep_type == ProglangType.INT_ARRAY) { 075 return new ValueSet.ValueSetScalarArray(DEFAULT_MAX_VALUES); 076 } else if (rep_type == ProglangType.DOUBLE) { 077 return new ValueSet.ValueSetFloat(DEFAULT_MAX_VALUES); 078 } else if (rep_type == ProglangType.DOUBLE_ARRAY) { 079 return new ValueSet.ValueSetFloatArray(DEFAULT_MAX_VALUES); 080 } else if (rep_type == ProglangType.STRING) { 081 return new ValueSet.ValueSetString(DEFAULT_MAX_VALUES); 082 } else if (rep_type == ProglangType.STRING_ARRAY) { 083 return new ValueSet.ValueSetStringArray(DEFAULT_MAX_VALUES); 084 } else { 085 throw new Error( 086 "Can't create ValueSet for " + var_info.name() + " with rep type " + rep_type); 087 } 088 } 089 090 /** Add the specified object (really, its hashcode) to the set. */ 091 public abstract void add(Object v1); 092 093 /** Add stats from the specified value set. */ 094 protected abstract void add_stats(ValueSet other); 095 096 /** Returns a short description of the values seen. */ 097 public abstract String repr_short(); 098 099 public void add(ValueSet other) { 100 if (this.getClass() != other.getClass()) { 101 throw new Error("ValueSet type mismatch: " + this.getClass() + " " + other.getClass()); 102 } 103 addAll(other); 104 add_stats(other); 105 } 106 107 public static class ValueSetScalar extends ValueSet { 108 // We are Serializable, so we specify a version to allow changes to 109 // method signatures without breaking serialization. If you add or 110 // remove fields, you should change this number to the current date. 111 static final long serialVersionUID = 20031017L; 112 113 long min_val = Long.MAX_VALUE; 114 long max_val = Long.MIN_VALUE; 115 116 public ValueSetScalar(int max_values) { 117 super(max_values); 118 } 119 120 @Override 121 public void add(Object v1) { 122 assert v1 != null; 123 long val = ((Long) v1).longValue(); 124 if (val < min_val) { 125 min_val = val; 126 } 127 if (val > max_val) { 128 max_val = val; 129 } 130 add(val); 131 } 132 133 @Override 134 protected void add_stats(ValueSet other) { 135 ValueSetScalar vs = (ValueSetScalar) other; 136 min_val = Math.min(min_val, vs.min_val); 137 max_val = Math.max(max_val, vs.max_val); 138 } 139 140 public long min() { 141 return min_val; 142 } 143 144 public long max() { 145 return max_val; 146 } 147 148 @Override 149 public String repr_short() { 150 if (size() > 0) { 151 return (size() + " values " + min_val + ".." + max_val); 152 } else { 153 return "0 values"; 154 } 155 } 156 } 157 158 public static class ValueSetFloat extends ValueSet { 159 // We are Serializable, so we specify a version to allow changes to 160 // method signatures without breaking serialization. If you add or 161 // remove fields, you should change this number to the current date. 162 static final long serialVersionUID = 20031017L; 163 164 double min_val = Double.MAX_VALUE; 165 double max_val = -Double.MAX_VALUE; 166 boolean can_be_NaN = false; 167 168 public ValueSetFloat(int max_values) { 169 super(max_values); 170 } 171 172 @Override 173 public void add(Object v1) { 174 assert v1 != null; 175 double val = ((Double) v1).doubleValue(); 176 if (val < min_val) { 177 min_val = val; 178 } 179 if (val > max_val) { 180 max_val = val; 181 } 182 if (Double.isNaN(val)) { 183 can_be_NaN = true; 184 // use the canonical NaN value 185 val = Double.NaN; 186 } 187 add(Double.doubleToLongBits(val)); 188 } 189 190 @Override 191 protected void add_stats(ValueSet other) { 192 ValueSetFloat vs = (ValueSetFloat) other; 193 min_val = Math.min(min_val, vs.min_val); 194 max_val = Math.max(max_val, vs.max_val); 195 can_be_NaN = can_be_NaN || vs.can_be_NaN; 196 } 197 198 public double min() { 199 return min_val; 200 } 201 202 public double max() { 203 return max_val; 204 } 205 206 public boolean canBeNaN() { 207 return can_be_NaN; 208 } 209 210 @Override 211 public String repr_short() { 212 if (size() > 0) { 213 return (size() 214 + " values " 215 + min_val 216 + ".." 217 + max_val 218 + "; " 219 + (can_be_NaN ? "can be " : "never ") 220 + "NaN"); 221 } else { 222 return "0 values"; 223 } 224 } 225 } 226 227 public static class ValueSetScalarArray extends ValueSet { 228 // We are Serializable, so we specify a version to allow changes to 229 // method signatures without breaking serialization. If you add or 230 // remove fields, you should change this number to the current date. 231 static final long serialVersionUID = 20031017L; 232 233 long min_val = Long.MAX_VALUE; 234 long max_val = Long.MIN_VALUE; 235 int max_length = 0; 236 int elem_cnt = 0; 237 int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements 238 239 public ValueSetScalarArray(int max_values) { 240 super(max_values); 241 } 242 243 @Override 244 public void add(Object v1) { 245 assert v1 != null; 246 long[] val = (long[]) v1; 247 for (int i = 0; i < val.length; i++) { 248 if (val[i] < min_val) { 249 min_val = val[i]; 250 } 251 if (val[i] > max_val) { 252 max_val = val[i]; 253 } 254 } 255 elem_cnt += val.length; 256 if (val.length > 1) nonsingleton_arr_cnt++; 257 if (val.length > max_length) max_length = val.length; 258 add(Arrays.hashCode((long[]) v1)); 259 } 260 261 @Override 262 protected void add_stats(ValueSet other) { 263 ValueSetScalarArray vs = (ValueSetScalarArray) other; 264 min_val = Math.min(min_val, vs.min_val); 265 max_val = Math.max(max_val, vs.max_val); 266 elem_cnt += vs.elem_cnt; 267 nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt; 268 max_length = Math.max(max_length, vs.max_length); 269 } 270 271 public long min() { 272 return min_val; 273 } 274 275 public long max() { 276 return max_val; 277 } 278 279 public int elem_cnt() { 280 return elem_cnt; 281 } 282 283 public int nonsingleton_arr_cnt() { 284 return nonsingleton_arr_cnt; 285 } 286 287 public int max_length() { 288 return max_length; 289 } 290 291 @Override 292 public String repr_short() { 293 if (size() > 0) { 294 return (size() + " values " + min_val + ".." + max_val); 295 } else { 296 return "0 values"; 297 } 298 } 299 } 300 301 public static class ValueSetFloatArray extends ValueSet { 302 // We are Serializable, so we specify a version to allow changes to 303 // method signatures without breaking serialization. If you add or 304 // remove fields, you should change this number to the current date. 305 static final long serialVersionUID = 20031017L; 306 307 double min_val = Long.MAX_VALUE; 308 double max_val = Long.MIN_VALUE; 309 boolean can_be_NaN = false; 310 int max_length = 0; 311 int elem_cnt = 0; 312 int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements 313 314 public ValueSetFloatArray(int max_values) { 315 super(max_values); 316 } 317 318 @Override 319 public void add(Object v1) { 320 assert v1 != null; 321 double[] val = (double[]) v1; 322 for (int i = 0; i < val.length; i++) { 323 if (val[i] < min_val) { 324 min_val = val[i]; 325 } 326 if (val[i] > max_val) { 327 max_val = val[i]; 328 } 329 if (Double.isNaN(val[i])) { 330 can_be_NaN = true; 331 } 332 } 333 elem_cnt += val.length; 334 if (val.length > 1) nonsingleton_arr_cnt++; 335 if (val.length > max_length) max_length = val.length; 336 add(Arrays.hashCode(val)); 337 } 338 339 @Override 340 protected void add_stats(ValueSet other) { 341 ValueSetFloatArray vs = (ValueSetFloatArray) other; 342 min_val = Math.min(min_val, vs.min_val); 343 max_val = Math.max(max_val, vs.max_val); 344 can_be_NaN = can_be_NaN || vs.can_be_NaN; 345 elem_cnt += vs.elem_cnt; 346 nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt; 347 max_length = Math.max(max_length, vs.max_length); 348 } 349 350 public double min() { 351 return min_val; 352 } 353 354 public double max() { 355 return max_val; 356 } 357 358 public boolean canBeNaN() { 359 return can_be_NaN; 360 } 361 362 public int elem_cnt() { 363 return elem_cnt; 364 } 365 366 public int nonsingleton_arr_cnt() { 367 return nonsingleton_arr_cnt; 368 } 369 370 public int max_length() { 371 return max_length; 372 } 373 374 @Override 375 public String repr_short() { 376 if (size() > 0) { 377 return (size() 378 + " values " 379 + min_val 380 + ".." 381 + max_val 382 + "; " 383 + (can_be_NaN ? "can be " : "never ") 384 + "NaN"); 385 } else { 386 return "0 values"; 387 } 388 } 389 } 390 391 public static class ValueSetString extends ValueSet { 392 // We are Serializable, so we specify a version to allow changes to 393 // method signatures without breaking serialization. If you add or 394 // remove fields, you should change this number to the current date. 395 static final long serialVersionUID = 20031017L; 396 397 public ValueSetString(int max_values) { 398 super(max_values); 399 } 400 401 @Override 402 public void add(Object v1) { 403 assert v1 != null; 404 add(((String) v1).hashCode()); 405 } 406 407 @Override 408 protected void add_stats(ValueSet other) {} 409 410 @Override 411 public String repr_short() { 412 return (size() + " values "); 413 } 414 } 415 416 public static class ValueSetStringArray extends ValueSet { 417 // We are Serializable, so we specify a version to allow changes to 418 // method signatures without breaking serialization. If you add or 419 // remove fields, you should change this number to the current date. 420 static final long serialVersionUID = 20031017L; 421 422 int elem_cnt = 0; 423 int nonsingleton_arr_cnt = 0; // number of arrays with 2 or more elements 424 425 public ValueSetStringArray(int max_values) { 426 super(max_values); 427 } 428 429 @Override 430 public void add(Object v1) { 431 assert v1 != null; 432 String[] val = (String[]) v1; 433 elem_cnt += val.length; 434 if (val.length > 1) nonsingleton_arr_cnt++; 435 add(Arrays.deepHashCode(val)); 436 } 437 438 @Override 439 protected void add_stats(ValueSet other) { 440 ValueSetStringArray vs = (ValueSetStringArray) other; 441 elem_cnt += vs.elem_cnt; 442 nonsingleton_arr_cnt += vs.nonsingleton_arr_cnt; 443 } 444 445 public int elem_cnt() { 446 return elem_cnt; 447 } 448 449 public int nonsingleton_arr_cnt() { 450 return nonsingleton_arr_cnt; 451 } 452 453 @Override 454 public String repr_short() { 455 return (size() + " values "); 456 } 457 } 458}