001 package util.sdss; 002 003 import java.io.*; 004 import java.text.DecimalFormat; 005 import java.util.*; 006 007 /** 008 * <p>Create a wavelength sorted list of quasar emission lines in observed 009 * reference frame from SDSS DR1.</p> 010 * By John Talbot, Nov 5, 2003 011 * 012 * <ul> 013 * <li>INPUT: dr1qso-by-plate.txt data file and one quasar FITS files per spectra</li> 014 * <li>OUTPUT: 015 * <ol> 016 * <li>list sorted by wavelength of observed frame emission lines with the 017 * quasar in which it occurs</li> 018 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and 019 * 128 A bin sizes.</li> 020 * <li>Histogram for one bin size and half-bin size shifted count</li> 021 * </ol> 022 * </li) 023 * </ul> 024 */ 025 public class EmissionLines { 026 public static final String outputDirectory = "data/sdss/results/strongest"; 027 028 public static void main(String[] args) { 029 try { 030 List<Line> lines = getEmissionLines(); 031 System.out.println(lines.size() + "lines read"); 032 // optional tasks : 033 getStatistics(lines, ""); 034 // Generate an equal number of random lines to verify distribution 035 List<Line> randomLines = getRandomLines(lines, lines.size()); 036 getStatistics(randomLines, "random"); 037 int numberOfSimulations = 1000; 038 getVarianceOfRandomLines(lines, numberOfSimulations); 039 } catch (Exception e) { 040 System.out.println("Exception" + e); 041 System.exit(0); 042 } 043 } 044 045 public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException { 046 int smallBinSize = 8192; 047 int maximum[] = new int[smallBinSize]; 048 int minimum[] = new int[smallBinSize]; 049 int sum[] = new int[smallBinSize]; 050 int sum2[] = new int[smallBinSize]; 051 for (int i = 0; i < smallBinSize; i++) { 052 minimum[i] = Integer.MAX_VALUE; // set to upper bound for integers for collecting minima 053 } 054 055 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 056 double max = 9392; 057 for (int n = 0; n < aNumberOfSimulations; n++) { 058 List<Line> randomLines = getRandomLines(lines, lines.size() - 28); 059 060 // produce 1 A bin size histogram of random lines 061 int histogram[] = new int[smallBinSize]; 062 for (Iterator x = randomLines.iterator(); x.hasNext(); ) { 063 Line line = (Line) x.next(); 064 double wavelength = line.getWavelength(); 065 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 066 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 067 } 068 069 // compute bin 4 histogram of random lines and save min, max, sum and sum2 070 int histogram4[] = new int[smallBinSize]; 071 for (int i = 0; i < smallBinSize; i++) { 072 int box = 0; 073 int j = 4; // Choose bin size 4 074 for (int k = 0; k < j; k++) { // centered sum 075 int offset = i + k - j/2; 076 if (offset >= 0 && offset < smallBinSize) box += histogram[offset]; 077 } 078 histogram4[i] = box; 079 sum[i] += box; 080 sum2[i] += box*box; 081 if (box < minimum[i]) minimum[i] = box; 082 if (box > maximum[i]) maximum[i] = box; 083 } 084 } 085 086 // produce 1 A bin size histogram of observed lines 087 int realHistogram[] = new int[smallBinSize]; 088 for (Line line : lines) { 089 double wavelength = line.getWavelength(); 090 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 091 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++; 092 } 093 094 // compute bin 4 histogram of observed lines 095 int realHistogram4[] = new int[smallBinSize]; 096 for (int i = 0; i < smallBinSize; i++) { 097 int box = 0; 098 int j = 4; // Choose bin size 4 099 for (int k = 0; k < j; k++) { // centered sum 100 int offset = i + k - j/2; 101 if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset]; 102 } 103 realHistogram4[i] = box; 104 } 105 106 107 FileWriter writer = new FileWriter(outputDirectory + "/variance" + aNumberOfSimulations + ".txt"); 108 DecimalFormat zFormatter = new DecimalFormat("0"); 109 double average[] = new double[smallBinSize]; 110 double standarddev[]= new double[smallBinSize]; 111 for (int i = 0; i < smallBinSize; i++) { 112 average[i] = (double) sum[i] / (double) aNumberOfSimulations; 113 double s = (double) sum[i]; 114 double s2 = (double) sum2[i]; 115 double num = (double)aNumberOfSimulations; 116 117 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0)); 118 119 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 120 double significance = 0.0; // clip negative deviations to zero 121 double deviation = ((double) realHistogram4[i]) - average[i]; 122 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i]; 123 124 writer.write(zFormatter.format(wavelength) + " " + 125 formatFloatCount(average[i]) + " " + 126 formatCount(minimum[i]) + " " + 127 formatCount(maximum[i]) + " " + 128 formatFloatCount(average[i] + standarddev[i]) + " " + 129 formatFloatCount(average[i] + 2*standarddev[i]) + " " + 130 formatFloatCount(average[i] + 3*standarddev[i]) + " " + 131 formatFloatCount(average[i] + 4*standarddev[i]) + " " + 132 formatFloatCount(average[i] + 5*standarddev[i]) + " " + 133 formatCount(realHistogram4[i]) + " " + 134 formatFloatCount(significance) + " " + 135 "\n"); 136 137 } 138 writer.close(); 139 } 140 141 /** Get a sorted list of quasar emission lines in the observed frame from 142 * SDSS spectra. 143 * Note: There is a many to one correspondence between a line and a quasar 144 * (i.e. several lines map to the same quasar) 145 * 146 * @return a list of quasar emission lines 147 */ 148 public static List<Line> getEmissionLines() throws java.io.IOException { 149 List<Line> allLines = new ArrayList<Line>(); 150 try { 151 DecimalFormat xFormatter = new DecimalFormat("0.000"); 152 DecimalFormat yFormatter = new DecimalFormat("0.000"); 153 List<Quasar> quasars = Quasar.readQuasars(); 154 int numberOfQuasars = quasars.size(); 155 for (int i = 0; i < numberOfQuasars; i++) { 156 Quasar qso = (Quasar) quasars.get(i); 157 System.out.println(numberOfQuasars-i); 158 //System.out.println("Quasar : " + qso); 159 List<Line> lines = qso.getEmissionLines(); 160 161 Collections.sort(lines, Line.STRENGTH_COMPARATOR); 162 allLines.addAll(lines); // collect statistics on all lines 163 164 //qso.showSpectra(); 165 /* 166 for (int j = 0; j < lines.size(); j++) { 167 Line line = (Line) lines.get(j); 168 System.out.println(xFormatter.format(line.getWavelength()) + "+-" + 169 xFormatter.format(line.wavelengthErr) + 170 " sigma=" + 171 xFormatter.format(line.sigma) + "+-" + 172 xFormatter.format(line.sigmaErr) + 173 " height=" + 174 xFormatter.format(line.height) + "+-" + 175 xFormatter.format(line.heightErr)); 176 } 177 */ 178 } 179 } catch (IOException e) { 180 System.out.println("error : " + e); 181 } 182 Collections.sort(allLines); 183 return allLines; 184 } 185 186 /** Get statistics from a list of quasar emission lines 187 * 188 * @param lines a List of Line instances representing the quasar emssion lines 189 */ 190 public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException { 191 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 192 double max = 9392; 193 int binSize = 8192; // should be named maxBinCount 194 195 // smallBinSize must produces 1 A bin size for 1200-9392 range 196 int smallBinSize = 8192; // should be named maxSmallBinCount 197 double halfBinSize = (max - min) / ((double) (2*binSize)); 198 int histogram[] = new int[smallBinSize]; 199 int histogram1[] = new int[binSize]; 200 int histogram2[] = new int[binSize]; 201 202 FileWriter writer = new FileWriter(outputDirectory + "/" + fileName + "emissionlines.txt"); 203 for (Line line : lines) { 204 Star quasar = line.getSource(); 205 writer.write(line.toString()); 206 if (quasar != null) writer.write(" " + quasar); 207 writer.write("\n"); 208 209 double wavelength = line.getWavelength(); 210 int binElement = (int) (((double)binSize)*(wavelength-min)/(max-min)); 211 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++; 212 213 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins) 214 binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min)); 215 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++; 216 217 // produce 1 A histogram 218 binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 219 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 220 } 221 writer.close(); 222 223 // Output a single histogram for the specified bin size 224 DecimalFormat zFormatter = new DecimalFormat("0"); 225 writer = new FileWriter(outputDirectory + "/" + fileName + "histogram" + binSize + ".txt"); 226 for (int i = 0; i < binSize; i++) { 227 double wavelength = ((double)i)*(max-min) / ((double)binSize) + min; 228 writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n"); 229 } 230 writer.close(); 231 232 // Output multiple histograms starting with bin size 1 and increasing in powers of 2 233 234 writer = new FileWriter(outputDirectory + "/" + fileName + "histogram.txt"); 235 for (int i = 0; i < smallBinSize; i++) { 236 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 237 writer.write(zFormatter.format(wavelength) + " "); 238 for (int j = 1; j < 256; j = j*2) { 239 int sum = 0; 240 // centered sum 241 for (int k = 0; k < j; k++) { 242 int offset = i + k - j/2; 243 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset]; 244 } 245 writer.write(formatCount(sum) + " "); 246 // TODO: append at end of line a code for each histogram 247 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth) 248 } 249 writer.write("\n"); 250 } 251 writer.close(); 252 } 253 254 // TODO: Fix this to allow histogram counts greater than 999 255 public static String formatCount(int aCount) { 256 String count = Integer.toString(aCount); 257 switch (count.length()) { 258 case 1: count = " " + count; 259 case 2: count = " " + count; 260 case 3: 261 } 262 return count; 263 } 264 265 static final DecimalFormat fFormatter = new DecimalFormat("0.00"); 266 public static String formatFloatCount(double aCount) { 267 String count = fFormatter.format(aCount); 268 switch (count.length()) { 269 case 4: count = " " + count; 270 } 271 return count; 272 } 273 274 /** Monte-Carlo generation of a specified number of emission lines randomly generated 275 * with the same distribution as bin 64 histogram which simulated 276 * the smoothed emission line density distribution. 277 * 278 * @param lines a List of Line instances representing peaks in the real quasar emission line data. 279 * @param number the number of random lines to generate 280 * @return a list of quasar emission lines with the same distribution as the input list 281 */ 282 283 public static List<Line> getRandomLines(List<Line> lines, int number) { 284 285 double min = 1200; // this wavelength range eliminates 28 quasars after 9392 286 double max = 9392; 287 int binSize = 8192; // should be named maxBinCount 288 289 // smallBinSize must produces 1 A bin size for 1200-9392 range 290 int smallBinSize = 8192; // should be named maxSmallBinCount 291 int histogram[] = new int[smallBinSize]; 292 int cumulative[] = new int[smallBinSize]; 293 294 // produce 1 A bin size histogram 295 for (Line line : lines) { 296 double wavelength = line.getWavelength(); 297 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min)); 298 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++; 299 } 300 301 // Compute cumulative sum of probability distribution 302 int cumulativeSum = 0; 303 for (int i = 0; i < smallBinSize; i++) { 304 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min; 305 int sum = 0; 306 int j = 64; // Choose bin size 64 histogram as probablity distribution 307 for (int k = 0; k < j; k++) { // centered sum 308 int offset = i + k - j/2; 309 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset]; 310 } 311 cumulativeSum += sum; 312 cumulative[i] = cumulativeSum; 313 } 314 315 // Normalize cumulative sum : 316 317 List<Line> randomLines = new LinkedList<Line>(); 318 319 // Generate random numbers with identical line density distribution 320 for (int i = 0; i < number; i++) { 321 // 1. Generate a random integer between 0 and cumulativeSum 322 int randint = (int) (Math.random() * ((double) cumulativeSum)); 323 //System.out.println("Random number = " + randint); 324 // 2. locate wavelength bin with equal value using binary search of cumulative distribution 325 int binElement = smallBinSize / 2; // initial bin number guess 326 boolean located = false; // flag indicating that we have found the bin 327 int interval = smallBinSize / 2; 328 while (!located) { 329 interval = interval / 2; 330 int sum = cumulative[binElement]; 331 332 if (randint == sum || interval == 0) located = true; 333 else if (randint < sum) binElement = binElement - interval; 334 else if (randint > sum) binElement = binElement + interval; 335 336 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum); 337 } 338 // convert bin number to wavelength 339 double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min; 340 randomLines.add(new Line(null, wavelength)); 341 } 342 343 344 //don't sort random line list because we might want to pick a subset 345 //Collections.sort(randomLines); 346 return randomLines; 347 } 348 349 } 350