001 package util.redshift; 002 003 import java.io.*; 004 import java.text.DecimalFormat; 005 import java.util.*; 006 007 /** 008 * <p>Create a redshift sorted list of quasars from Veron (2003) . 009 * The program will extract the emission emission line redshift 010 * column 71- 75.</p> 011 * By John Talbot, Oct 4, 2003 012 * 013 * <ul> 014 * <li>INPUT: table1.txt data file from 2003ApJS...87..451H</li> 015 * <li>OUTPUT: 016 * <ol> 017 * <li>list of quasars sorted by redshifts</li> 018 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and 019 * 128 A bin sizes.</li> 020 * <li>Histogram for one bin size and half-bin size shifted count</li> 021 * </ol> 022 * </li) 023 * </ul> 024 */ 025 public class Redshifts { 026 public static final double min = 0.0; // wavelength range with 512 data points at 0.01 z step 027 public static final double max = 5.12; // this upper bound for redshift excludes only 12 quasars 028 public static final int cutoff = 12; // number of quasars which fall outside the min to max redshift range 029 public static final DecimalFormat zFormatter = new DecimalFormat("0.00000"); // redshift precision 030 031 public static void main(String[] args) { 032 try { 033 List<Quasar> quasars = Quasar.readQuasars(); 034 // optional tasks : 035 getStatistics(quasars, "z"); 036 // Generate an equal number of random lines to verify distribution 037 List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff); 038 getStatistics(randomQuasars, "randomz"); 039 int numberOfSimulations = 1000; 040 getVarianceOfRandomQuasars(randomQuasars, numberOfSimulations); 041 } catch (Exception e) { 042 System.out.println("Exception" + e); 043 System.exit(0); 044 } 045 } 046 047 public static void getVarianceOfRandomQuasars(List<Quasar> quasars, int aNumberOfSimulations) throws java.io.IOException { 048 int smallBinSize = 8192; 049 int maximum[] = new int[smallBinSize]; 050 int minimum[] = new int[smallBinSize]; 051 int sum[] = new int[smallBinSize]; 052 int sum2[] = new int[smallBinSize]; 053 java.util.Arrays.fill(minimum, Integer.MAX_VALUE); // set to upper bound for integers for collecting minima 054 for (int n = 0; n < aNumberOfSimulations; n++) { 055 List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff); 056 057 // produce dz = (5.12/8192) = 0.000625 bin size histogram of random lines 058 int histogram[] = new int[smallBinSize]; 059 for (Quasar qso : randomQuasars) { 060 double z = qso.getZem(); 061 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min)); 062 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 063 } 064 065 // compute bin dz = 8 * 0.000625 = .005 histogram of random lines and save min, max, sum and sum2 066 int histogram8[] = new int[smallBinSize]; 067 for (int i = 0; i < smallBinSize; i++) { 068 int box = 0; 069 int j = 8; // Choose bin size 8 * 0.000625 = .005 070 for (int k = 0; k < j; k++) { // centered sum 071 int offset = i + k - j/2; 072 if (offset >= 0 && offset < smallBinSize) box += histogram[offset]; 073 } 074 histogram8[i] = box; 075 sum[i] += box; 076 sum2[i] += box*box; 077 if (box < minimum[i]) minimum[i] = box; 078 if (box > maximum[i]) maximum[i] = box; 079 } 080 } 081 082 // produce dz = (5.12/8192) = 0.000625 bin size histogram of observed redshifts 083 int realHistogram[] = new int[smallBinSize]; 084 for (Quasar qso : quasars) { 085 double z = qso.getZem(); 086 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min)); 087 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++; 088 } 089 090 // compute bin dz = 8 * 0.000625 = .005 histogram of observed redshifts 091 int realHistogram8[] = new int[smallBinSize]; 092 for (int i = 0; i < smallBinSize; i++) { 093 int box = 0; 094 int j = 8; // Choose bin size 8 * 0.000625 = .005 095 for (int k = 0; k < j; k++) { // centered sum 096 int offset = i + k - j/2; 097 if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset]; 098 } 099 realHistogram8[i] = box; 100 } 101 102 103 FileWriter writer = new FileWriter("data/zvariance" + aNumberOfSimulations + ".txt"); 104 double average[] = new double[smallBinSize]; 105 double standarddev[]= new double[smallBinSize]; 106 for (int i = 0; i < smallBinSize; i++) { 107 average[i] = (double) sum[i] / (double) aNumberOfSimulations; 108 double s = (double) sum[i]; 109 double s2 = (double) sum2[i]; 110 double num = (double)aNumberOfSimulations; 111 112 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0)); 113 114 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min; 115 double significance = 0.0; // clip negative deviations to zero 116 double deviation = ((double) realHistogram8[i]) - average[i]; 117 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i]; 118 119 writer.write(zFormatter.format(z) + " " + 120 formatFloatCount(average[i]) + " " + 121 formatCount(minimum[i]) + " " + 122 formatCount(maximum[i]) + " " + 123 formatFloatCount(average[i] + standarddev[i]) + " " + 124 formatFloatCount(average[i] + 2*standarddev[i]) + " " + 125 formatFloatCount(average[i] + 3*standarddev[i]) + " " + 126 formatFloatCount(average[i] + 4*standarddev[i]) + " " + 127 formatFloatCount(average[i] + 5*standarddev[i]) + " " + 128 formatCount(realHistogram8[i]) + " " + 129 formatFloatCount(significance) + " " + 130 "\n"); 131 132 } 133 writer.close(); 134 } 135 136 /** Get statistics from a list of quasar redshifts 137 * 138 * @param quasars quasars instances (each having a redshift property) 139 * @param fileName the filename 140 */ 141 public static void getStatistics(List<Quasar> quasars, String fileName) throws java.io.IOException { 142 int binSize = 8192; // should be named maxBinCount 143 144 int smallBinSize = 8192; // should be named maxSmallBinCount 145 double halfBinSize = (max - min) / ((double) (2 * binSize)); 146 int histogram[] = new int[smallBinSize]; 147 int histogram1[] = new int[binSize]; 148 int histogram2[] = new int[binSize]; 149 150 for (Quasar qso : quasars) { 151 double z = qso.getZem(); 152 int binElement = (int) (((double)binSize)*(z-min)/(max-min)); 153 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++; 154 155 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins) 156 binElement = (int) (((double)binSize)*(z-min-halfBinSize)/(max-min)); 157 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++; 158 159 // produce 1 A histogram 160 binElement = (int) (((double)smallBinSize)*(z-min)/(max-min)); 161 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 162 } 163 164 // Output a single histogram for the specified bin size 165 166 FileWriter writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt"); 167 for (int i = 0; i < binSize; i++) { 168 double z = ((double)i)*(max-min) / ((double)binSize) + min; 169 writer.write(zFormatter.format(z) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n"); 170 } 171 writer.close(); 172 173 // Output multiple histograms starting with bin size 5.12/8192 and increasing in powers of 2 174 175 writer = new FileWriter("data/" + fileName + "histogram.txt"); 176 for (int i = 0; i < smallBinSize; i++) { 177 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min; 178 writer.write(zFormatter.format(z) + " "); 179 for (int j = 1; j < 256; j = j*2) { 180 int sum = 0; 181 // centered sum 182 for (int k = 0; k < j; k++) { 183 int offset = i + k - j/2; 184 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset]; 185 } 186 writer.write(formatCount(sum) + " "); 187 // TODO: append at end of line a code for each histogram 188 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth) 189 } 190 writer.write("\n"); 191 } 192 writer.close(); 193 } 194 195 // TODO: Fix this to allow histogram counts greater than 999 196 public static String formatCount(int aCount) { 197 String count = Integer.toString(aCount); 198 switch (count.length()) { 199 case 1: count = " " + count; break; 200 case 2: count = " " + count; break; 201 case 3: count = " " + count; break; 202 case 4: count = " " + count; break; 203 case 5: 204 } 205 return count; 206 } 207 208 static final DecimalFormat fFormatter = new DecimalFormat("0.00"); 209 public static String formatFloatCount(double aCount) { 210 String count = fFormatter.format(aCount); 211 switch (count.length()) { 212 case 4: count = " " + count; 213 } 214 return count; 215 } 216 217 /** Monte-Carlo generation of a specified number of redshift randomly generated 218 * with the same distribution as bin delta z = 0.08 histogram which simulated 219 * the smoothed redshift distribution. 220 * 221 * @param quasars a List of Quasar instances containing actual redshift value. 222 * @param number the number of random redshifts to generate 223 * @return a list of quasar instances with redshifts having the same distribution as the input list 224 */ 225 public static List<Quasar> getRandomQuasars(List<Quasar> quasars, int number) { 226 227 int binSize = 8192; // should be named maxBinCount 228 229 // smallBinSize produces dz = 5.12/8192 = 0.000625 bin size for z = 0.00 - 5.12 range 230 int smallBinSize = 8192; // should be named maxSmallBinCount 231 int histogram[] = new int[smallBinSize]; 232 int cumulative[] = new int[smallBinSize]; 233 234 // produce 1 A bin size histogram 235 for (Iterator x = quasars.iterator(); x.hasNext(); ) { 236 Quasar qso = (Quasar) x.next(); 237 double z = qso.getZem(); 238 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min)); 239 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 240 } 241 242 // Compute cumulative sum of probability distribution 243 int cumulativeSum = 0; 244 for (int i = 0; i < smallBinSize; i++) { 245 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min; 246 int sum = 0; 247 int j = 128; // Choose bin size dz = 128 * 5.12/8192 = .08 histogram as probablity distribution 248 for (int k = 0; k < j; k++) { // centered sum 249 int offset = i + k - j/2; 250 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset]; 251 } 252 cumulativeSum += sum; 253 cumulative[i] = cumulativeSum; 254 } 255 256 // Normalize cumulative sum : 257 258 List<Quasar> randomQuasars = new LinkedList<Quasar>(); 259 260 // Generate random numbers with identical line density distribution 261 for (int i = 0; i < number; i++) { 262 // 1. Generate a random integer between 0 and cumulativeSum 263 int randint = (int) (Math.random() * ((double) cumulativeSum)); 264 //System.out.println("Random number = " + randint); 265 // 2. locate wavelength bin with equal value using binary search of cumulative distribution 266 int binElement = smallBinSize / 2; // initial bin number guess 267 boolean located = false; // flag indicating that we have found the bin 268 int interval = smallBinSize / 2; 269 while (!located) { 270 interval = interval / 2; 271 int sum = cumulative[binElement]; 272 273 if (randint == sum || interval == 0) located = true; 274 else if (randint < sum) binElement = binElement - interval; 275 else if (randint > sum) binElement = binElement + interval; 276 277 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum); 278 } 279 // convert bin number to z 280 double z = ((double)binElement) * (max - min) / ((double)smallBinSize) + min; 281 Quasar qso = new Quasar(z); 282 randomQuasars.add(qso); 283 } 284 285 286 //don't sort random redshift list because we might want to pick a subset 287 //Collections.sort(randomLines); 288 return randomQuasars; 289 } 290 291 } 292