001 package util; 002 003 import java.io.*; 004 import java.text.DecimalFormat; 005 import java.util.*; 006 import java.lang.Math.*; 007 008 /** 009 * <p>Create a wavelength sorted list of quasar emission lines in observed 010 * reference frame from Hewitt and Burbidge 1993ApJS...87..451H . 011 * The program will extract the emission line from column 81-85 (in quasar 012 * restframe) then 'redshift' it back to the observed frame by multiplying 013 * it by 1 + z(em) where the emission redshift is from columns 68-73 (ends 014 * with first non-numeric char or whitespace).</p> 015 * By John Talbot, Sep 16, 2003 016 * 017 * <ul> 018 * <li>INPUT: table1.txt data file from 1993ApJS...87..451H, with page headers 019 * removed and ZZZ as the last line</li> 020 * <li>OUTPUT: 021 * <ol> 022 * <li>list sorted by wavelength of observed frame emission lines with the 023 * quasar in which it occurs</li> 024 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and 025 * 128 A bin sizes.</li> 026 * <li>Histogram for one bin size and half-bin size shifted count</li> 027 * </ol> 028 * </li) 029 * </ul> 030 */ 031 public class EmissionLines { 032 public static void main(String[] args) { 033 try { 034 List<Line> lines = getEmissionLines(); 035 // optional tasks : 036 getStatistics(lines, ""); 037 // Generate an equal number of random lines to verify distribution 038 List<Line> randomLines = getRandomLines(lines, lines.size() - 28); 039 getStatistics(randomLines, "random"); 040 int numberOfSimulations = 1000; 041 getVarianceOfRandomLines(lines, numberOfSimulations); 042 } catch (Exception e) { 043 System.out.println("Exception" + e); 044 System.exit(0); 045 } 046 } 047 048 public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException { 049 int smallBinSize = 8192; 050 int maximum[] = new int[smallBinSize]; 051 int minimum[] = new int[smallBinSize]; 052 int sum[] = new int[smallBinSize]; 053 int sum2[] = new int[smallBinSize]; 054 Arrays.fill(minimum, Integer.MAX_VALUE); // set to upper bound for integers for collecting minima 055 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 056 double max = 9392; 057 for (int n = 0; n < aNumberOfSimulations; n++) { 058 List<Line> randomLines = getRandomLines(lines, lines.size() - 28); 059 060 // produce 1 A bin size histogram of random lines 061 int histogram[] = new int[smallBinSize]; 062 for (Line line : randomLines) { 063 double wavelength = line.getWavelength(); 064 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 065 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 066 } 067 /** // simpler code which makes use of collections : 068 Collection<Integer> histogram = new LinkedList<Integer>(); 069 int maximumElementValue = Integer,MAX_INT; 070 for(Line line : randomLines) { 071 int binElement = (int) (((double)smallBinSize)*(line.getWavelength()-min)/(max-min)); 072 maximumElementValue = max(maximumElementValue, binElement); 073 histogram.add(binElement); 074 } 075 for (int i = 0; i < maximumElementValue; i++) System.out.println(Collections.frequency(histogram, i)); 076 */ 077 // compute bin 4 histogram of random lines and save min, max, sum and sum2 078 int histogram4[] = new int[smallBinSize]; 079 for (int i = 0; i < smallBinSize; i++) { 080 int box = 0; 081 int j = 4; // Choose bin size 4 082 for (int k = 0; k < j; k++) { // centered sum 083 int offset = i + k - j/2; 084 if (offset >= 0 && offset < smallBinSize) box += histogram[offset]; 085 } 086 histogram4[i] = box; 087 sum[i] += box; 088 sum2[i] += box*box; 089 minimum[i] = Math.min(minimum[i], box); 090 maximum[i] = Math.max(maximum[i], box); 091 } 092 } 093 094 095 // produce 1 A bin size histogram of observed lines 096 int realHistogram[] = new int[smallBinSize]; 097 for (Line line : lines) { 098 double wavelength = line.getWavelength(); 099 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 100 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++; 101 } 102 103 // compute bin 4 histogram of observed lines 104 int realHistogram4[] = new int[smallBinSize]; 105 for (int i = 0; i < smallBinSize; i++) { 106 int box = 0; 107 int j = 4; // Choose bin size 4 108 for (int k = 0; k < j; k++) { // centered sum 109 int offset = i + k - j/2; 110 if (offset >= 0 && offset < smallBinSize) 111 box += realHistogram[offset]; 112 } 113 realHistogram4[i] = box; 114 } 115 116 117 FileWriter writer = new FileWriter("data/variance" + aNumberOfSimulations + ".txt"); 118 DecimalFormat zFormatter = new DecimalFormat("0"); 119 double average[] = new double[smallBinSize]; 120 double standarddev[]= new double[smallBinSize]; 121 for (int i = 0; i < smallBinSize; i++) { 122 average[i] = (double) sum[i] / (double) aNumberOfSimulations; 123 double s = (double) sum[i]; 124 double s2 = (double) sum2[i]; 125 double num = (double)aNumberOfSimulations; 126 127 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0)); 128 129 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 130 double significance = 0.0; // clip negative deviations to zero 131 double deviation = ((double) realHistogram4[i]) - average[i]; 132 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i]; 133 134 writer.write(zFormatter.format(wavelength) + " " + 135 formatFloatCount(average[i]) + " " + 136 formatCount(minimum[i]) + " " + 137 formatCount(maximum[i]) + " " + 138 formatFloatCount(average[i] + standarddev[i]) + " " + 139 formatFloatCount(average[i] + 2*standarddev[i]) + " " + 140 formatFloatCount(average[i] + 3*standarddev[i]) + " " + 141 formatFloatCount(average[i] + 4*standarddev[i]) + " " + 142 formatFloatCount(average[i] + 5*standarddev[i]) + " " + 143 formatCount(realHistogram4[i]) + " " + 144 formatFloatCount(significance) + " " + 145 "\n"); 146 147 } 148 writer.close(); 149 } 150 151 /** Get a sorted list of quasar emission lines in the observed frame from 152 * Hewitt and Burbidge 1993ApJS...87..451H. 153 * Note: There is a many to one correspondence between a line and a quasar 154 * (i.e. several lines map to the same quasar) 155 * 156 * @return list of quasar emission lines 157 */ 158 public static List<Line> getEmissionLines() throws java.io.IOException { 159 FileReader reader = new FileReader("data/table1noheaders.txt"); 160 BufferedReader in = new BufferedReader(reader); 161 int rowNumber = 1; // resets to one after each quasar 162 Quasar qso = new Quasar(); 163 while (true) { 164 String row = in.readLine(); 165 if (row.indexOf("ZZZ") > -1) 166 break; // last line 167 else if (row.length() == 0) { // blank line ends each quasar 168 qso = new Quasar(); 169 rowNumber = 0; 170 } else if (rowNumber == 1) { // first row 171 qso.addFirstRow(row); 172 } else if (rowNumber == 2) { // second row 173 qso.addSecondRow(row); 174 } else { 175 qso.addMoreRows(row); 176 } 177 rowNumber++; 178 } 179 reader.close(); 180 // warning: [unchecked] unchecked method invocation: <T>sort(java.util.List<T>) in java.util.Collections is applied to (java.util.List<util.Line>) 181 Collections.sort(Quasar.allLines); 182 System.out.println(Quasar.count + " quasars read"); 183 return Quasar.allLines; 184 } 185 186 /** Get statistics from a list of quasar emission lines 187 * 188 * @param lines a List of Line instances representing the quasar emssion lines 189 */ 190 public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException { 191 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 192 double max = 9392; 193 int binSize = 8192; // should be named maxBinCount 194 195 // smallBinSize must produces 1 A bin size for 1200-9392 range 196 int smallBinSize = 8192; // should be named maxSmallBinCount 197 double halfBinSize = (max - min) / ((double) (2*binSize)); 198 int histogram[] = new int[smallBinSize]; 199 int histogram1[] = new int[binSize]; 200 int histogram2[] = new int[binSize]; 201 202 FileWriter writer = new FileWriter("data/" + fileName + "emissionlines.txt"); 203 for (Line line : lines) { 204 Star quasar = line.getSource(); 205 writer.write(line.toString()); 206 if (quasar != null) writer.write(" " + quasar); 207 writer.write("\n"); 208 209 double wavelength = line.getWavelength(); 210 int binElement = (int) (((double)binSize)*(wavelength -min)/(max - min)); 211 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++; 212 213 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins) 214 binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min)); 215 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++; 216 217 // produce 1 A histogram 218 binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 219 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 220 } 221 writer.close(); 222 223 // Output a single histogram for the specified bin size 224 DecimalFormat zFormatter = new DecimalFormat("0"); 225 writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt"); 226 for (int i = 0; i < binSize; i++) { 227 double wavelength = ((double)i) * (max - min) / ((double)binSize) + min; 228 writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n"); 229 } 230 writer.close(); 231 232 // Output multiple histograms starting with bin size 1 and increasing in powers of 2 233 234 writer = new FileWriter("data/" + fileName + "histogram.txt"); 235 for (int i = 0; i < smallBinSize; i++) { 236 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 237 writer.write(zFormatter.format(wavelength) + " "); 238 for (int j = 1; j < 256; j = j*2) { 239 int sum = 0; 240 // centered sum 241 for (int k = 0; k < j; k++) { 242 int offset = i + k - j/2; 243 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset]; 244 } 245 writer.write(formatCount(sum) + " "); 246 // TODO: append at end of line a code for each histogram 247 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth) 248 } 249 writer.write("\n"); 250 } 251 writer.close(); 252 } 253 254 // TODO: Fix this to allow histogram counts greater than 999 255 public static String formatCount(int aCount) { 256 String count = Integer.toString(aCount); 257 switch (count.length()) { 258 case 1: count = " " + count; 259 case 2: count = " " + count; 260 case 3: 261 } 262 return count; 263 } 264 265 static final DecimalFormat fFormatter = new DecimalFormat("0.00"); 266 public static String formatFloatCount(double aCount) { 267 String count = fFormatter.format(aCount); 268 switch (count.length()) { 269 case 4: count = " " + count; 270 } 271 return count; 272 } 273 274 /** Monte-Carlo generation of a specified number of emission lines randomly generated 275 * with the same distribution as bin 64 histogram which simulated 276 * the smoothed emission line density distribution. 277 * 278 * @param lines a List of Line instances representing peaks in the real quasar emission line data. 279 * @param number the number of random lines to generate 280 * @return a list of quasar emission lines with the same distribution as the input list 281 */ 282 283 public static List<Line> getRandomLines(List<Line> lines, int number) { 284 285 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 286 double max = 9392; 287 int binSize = 8192; // should be named maxBinCount 288 289 // smallBinSize must produces 1 A bin size for 1200-9392 range 290 int smallBinSize = 8192; // should be named maxSmallBinCount 291 int histogram[] = new int[smallBinSize]; 292 int cumulative[] = new int[smallBinSize]; 293 294 // produce 1 A bin size histogram 295 for (Line line : lines) { 296 double wavelength = line.getWavelength(); 297 int binElement = (int) (((double)smallBinSize) * (wavelength - min)/(max-min)); 298 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 299 } 300 301 // Compute cumulative sum of probability distribution 302 int cumulativeSum = 0; 303 for (int i = 0; i < smallBinSize; i++) { 304 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 305 int sum = 0; 306 int j = 64; // Choose bin size 64 histogram as probablity distribution 307 for (int k = 0; k < j; k++) { // centered sum 308 int offset = i + k - j/2; 309 if (offset >= 0 && offset < smallBinSize) 310 sum += histogram[offset]; 311 } 312 cumulativeSum += sum; 313 cumulative[i] = cumulativeSum; 314 } 315 316 // Normalize cumulative sum : 317 318 List<Line> randomLines = new LinkedList<Line>(); 319 320 // Generate random numbers with identical line density distribution 321 for (int i = 0; i < number; i++) { 322 // 1. Generate a random integer between 0 and cumulativeSum 323 int randint = (int) (Math.random() * ((double) cumulativeSum)); 324 //System.out.println("Random number = " + randint); 325 // 2. locate wavelength bin with equal value using binary search of cumulative distribution 326 int binElement = smallBinSize / 2; // initial bin number guess 327 boolean located = false; // flag indicating that we have found the bin 328 int interval = smallBinSize / 2; 329 while (!located) { 330 interval = interval / 2; 331 int sum = cumulative[binElement]; 332 333 if (randint == sum || interval == 0) located = true; 334 else if (randint < sum) binElement = binElement - interval; 335 else if (randint > sum) binElement = binElement + interval; 336 337 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum); 338 } 339 // convert bin number to wavelength 340 double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min; 341 randomLines.add(new Line(null, wavelength)); 342 } 343 344 345 //don't sort random line list because we might want to pick a subset 346 //Collections.sort(randomLines); 347 return randomLines; 348 } 349 350 } 351