001 package util.sdss;
002
003 import java.io.*;
004 import java.text.DecimalFormat;
005 import java.util.*;
006
007 /**
008 * <p>Create a wavelength sorted list of quasar emission lines in observed
009 * reference frame from SDSS DR1.</p>
010 * By John Talbot, Nov 5, 2003
011 *
012 * <ul>
013 * <li>INPUT: dr1qso-by-plate.txt data file and one quasar FITS files per spectra</li>
014 * <li>OUTPUT:
015 * <ol>
016 * <li>list sorted by wavelength of observed frame emission lines with the
017 * quasar in which it occurs</li>
018 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
019 * 128 A bin sizes.</li>
020 * <li>Histogram for one bin size and half-bin size shifted count</li>
021 * </ol>
022 * </li)
023 * </ul>
024 */
025 public class EmissionLines {
026 public static final String outputDirectory = "data/sdss/results/strongest";
027
028 public static void main(String[] args) {
029 try {
030 List<Line> lines = getEmissionLines();
031 System.out.println(lines.size() + "lines read");
032 // optional tasks :
033 getStatistics(lines, "");
034 // Generate an equal number of random lines to verify distribution
035 List<Line> randomLines = getRandomLines(lines, lines.size());
036 getStatistics(randomLines, "random");
037 int numberOfSimulations = 1000;
038 getVarianceOfRandomLines(lines, numberOfSimulations);
039 } catch (Exception e) {
040 System.out.println("Exception" + e);
041 System.exit(0);
042 }
043 }
044
045 public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException {
046 int smallBinSize = 8192;
047 int maximum[] = new int[smallBinSize];
048 int minimum[] = new int[smallBinSize];
049 int sum[] = new int[smallBinSize];
050 int sum2[] = new int[smallBinSize];
051 for (int i = 0; i < smallBinSize; i++) {
052 minimum[i] = Integer.MAX_VALUE; // set to upper bound for integers for collecting minima
053 }
054
055 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
056 double max = 9392;
057 for (int n = 0; n < aNumberOfSimulations; n++) {
058 List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
059
060 // produce 1 A bin size histogram of random lines
061 int histogram[] = new int[smallBinSize];
062 for (Iterator x = randomLines.iterator(); x.hasNext(); ) {
063 Line line = (Line) x.next();
064 double wavelength = line.getWavelength();
065 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
066 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
067 }
068
069 // compute bin 4 histogram of random lines and save min, max, sum and sum2
070 int histogram4[] = new int[smallBinSize];
071 for (int i = 0; i < smallBinSize; i++) {
072 int box = 0;
073 int j = 4; // Choose bin size 4
074 for (int k = 0; k < j; k++) { // centered sum
075 int offset = i + k - j/2;
076 if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
077 }
078 histogram4[i] = box;
079 sum[i] += box;
080 sum2[i] += box*box;
081 if (box < minimum[i]) minimum[i] = box;
082 if (box > maximum[i]) maximum[i] = box;
083 }
084 }
085
086 // produce 1 A bin size histogram of observed lines
087 int realHistogram[] = new int[smallBinSize];
088 for (Line line : lines) {
089 double wavelength = line.getWavelength();
090 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
091 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
092 }
093
094 // compute bin 4 histogram of observed lines
095 int realHistogram4[] = new int[smallBinSize];
096 for (int i = 0; i < smallBinSize; i++) {
097 int box = 0;
098 int j = 4; // Choose bin size 4
099 for (int k = 0; k < j; k++) { // centered sum
100 int offset = i + k - j/2;
101 if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset];
102 }
103 realHistogram4[i] = box;
104 }
105
106
107 FileWriter writer = new FileWriter(outputDirectory + "/variance" + aNumberOfSimulations + ".txt");
108 DecimalFormat zFormatter = new DecimalFormat("0");
109 double average[] = new double[smallBinSize];
110 double standarddev[]= new double[smallBinSize];
111 for (int i = 0; i < smallBinSize; i++) {
112 average[i] = (double) sum[i] / (double) aNumberOfSimulations;
113 double s = (double) sum[i];
114 double s2 = (double) sum2[i];
115 double num = (double)aNumberOfSimulations;
116
117 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
118
119 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
120 double significance = 0.0; // clip negative deviations to zero
121 double deviation = ((double) realHistogram4[i]) - average[i];
122 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];
123
124 writer.write(zFormatter.format(wavelength) + " " +
125 formatFloatCount(average[i]) + " " +
126 formatCount(minimum[i]) + " " +
127 formatCount(maximum[i]) + " " +
128 formatFloatCount(average[i] + standarddev[i]) + " " +
129 formatFloatCount(average[i] + 2*standarddev[i]) + " " +
130 formatFloatCount(average[i] + 3*standarddev[i]) + " " +
131 formatFloatCount(average[i] + 4*standarddev[i]) + " " +
132 formatFloatCount(average[i] + 5*standarddev[i]) + " " +
133 formatCount(realHistogram4[i]) + " " +
134 formatFloatCount(significance) + " " +
135 "\n");
136
137 }
138 writer.close();
139 }
140
141 /** Get a sorted list of quasar emission lines in the observed frame from
142 * SDSS spectra.
143 * Note: There is a many to one correspondence between a line and a quasar
144 * (i.e. several lines map to the same quasar)
145 *
146 * @return a list of quasar emission lines
147 */
148 public static List<Line> getEmissionLines() throws java.io.IOException {
149 List<Line> allLines = new ArrayList<Line>();
150 try {
151 DecimalFormat xFormatter = new DecimalFormat("0.000");
152 DecimalFormat yFormatter = new DecimalFormat("0.000");
153 List<Quasar> quasars = Quasar.readQuasars();
154 int numberOfQuasars = quasars.size();
155 for (int i = 0; i < numberOfQuasars; i++) {
156 Quasar qso = (Quasar) quasars.get(i);
157 System.out.println(numberOfQuasars-i);
158 //System.out.println("Quasar : " + qso);
159 List<Line> lines = qso.getEmissionLines();
160
161 Collections.sort(lines, Line.STRENGTH_COMPARATOR);
162 allLines.addAll(lines); // collect statistics on all lines
163
164 //qso.showSpectra();
165 /*
166 for (int j = 0; j < lines.size(); j++) {
167 Line line = (Line) lines.get(j);
168 System.out.println(xFormatter.format(line.getWavelength()) + "+-" +
169 xFormatter.format(line.wavelengthErr) +
170 " sigma=" +
171 xFormatter.format(line.sigma) + "+-" +
172 xFormatter.format(line.sigmaErr) +
173 " height=" +
174 xFormatter.format(line.height) + "+-" +
175 xFormatter.format(line.heightErr));
176 }
177 */
178 }
179 } catch (IOException e) {
180 System.out.println("error : " + e);
181 }
182 Collections.sort(allLines);
183 return allLines;
184 }
185
186 /** Get statistics from a list of quasar emission lines
187 *
188 * @param lines a List of Line instances representing the quasar emssion lines
189 */
190 public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException {
191 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
192 double max = 9392;
193 int binSize = 8192; // should be named maxBinCount
194
195 // smallBinSize must produces 1 A bin size for 1200-9392 range
196 int smallBinSize = 8192; // should be named maxSmallBinCount
197 double halfBinSize = (max - min) / ((double) (2*binSize));
198 int histogram[] = new int[smallBinSize];
199 int histogram1[] = new int[binSize];
200 int histogram2[] = new int[binSize];
201
202 FileWriter writer = new FileWriter(outputDirectory + "/" + fileName + "emissionlines.txt");
203 for (Line line : lines) {
204 Star quasar = line.getSource();
205 writer.write(line.toString());
206 if (quasar != null) writer.write(" " + quasar);
207 writer.write("\n");
208
209 double wavelength = line.getWavelength();
210 int binElement = (int) (((double)binSize)*(wavelength-min)/(max-min));
211 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
212
213 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
214 binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min));
215 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
216
217 // produce 1 A histogram
218 binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
219 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
220 }
221 writer.close();
222
223 // Output a single histogram for the specified bin size
224 DecimalFormat zFormatter = new DecimalFormat("0");
225 writer = new FileWriter(outputDirectory + "/" + fileName + "histogram" + binSize + ".txt");
226 for (int i = 0; i < binSize; i++) {
227 double wavelength = ((double)i)*(max-min) / ((double)binSize) + min;
228 writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
229 }
230 writer.close();
231
232 // Output multiple histograms starting with bin size 1 and increasing in powers of 2
233
234 writer = new FileWriter(outputDirectory + "/" + fileName + "histogram.txt");
235 for (int i = 0; i < smallBinSize; i++) {
236 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
237 writer.write(zFormatter.format(wavelength) + " ");
238 for (int j = 1; j < 256; j = j*2) {
239 int sum = 0;
240 // centered sum
241 for (int k = 0; k < j; k++) {
242 int offset = i + k - j/2;
243 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
244 }
245 writer.write(formatCount(sum) + " ");
246 // TODO: append at end of line a code for each histogram
247 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
248 }
249 writer.write("\n");
250 }
251 writer.close();
252 }
253
254 // TODO: Fix this to allow histogram counts greater than 999
255 public static String formatCount(int aCount) {
256 String count = Integer.toString(aCount);
257 switch (count.length()) {
258 case 1: count = " " + count;
259 case 2: count = " " + count;
260 case 3:
261 }
262 return count;
263 }
264
265 static final DecimalFormat fFormatter = new DecimalFormat("0.00");
266 public static String formatFloatCount(double aCount) {
267 String count = fFormatter.format(aCount);
268 switch (count.length()) {
269 case 4: count = " " + count;
270 }
271 return count;
272 }
273
274 /** Monte-Carlo generation of a specified number of emission lines randomly generated
275 * with the same distribution as bin 64 histogram which simulated
276 * the smoothed emission line density distribution.
277 *
278 * @param lines a List of Line instances representing peaks in the real quasar emission line data.
279 * @param number the number of random lines to generate
280 * @return a list of quasar emission lines with the same distribution as the input list
281 */
282
283 public static List<Line> getRandomLines(List<Line> lines, int number) {
284
285 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
286 double max = 9392;
287 int binSize = 8192; // should be named maxBinCount
288
289 // smallBinSize must produces 1 A bin size for 1200-9392 range
290 int smallBinSize = 8192; // should be named maxSmallBinCount
291 int histogram[] = new int[smallBinSize];
292 int cumulative[] = new int[smallBinSize];
293
294 // produce 1 A bin size histogram
295 for (Line line : lines) {
296 double wavelength = line.getWavelength();
297 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
298 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
299 }
300
301 // Compute cumulative sum of probability distribution
302 int cumulativeSum = 0;
303 for (int i = 0; i < smallBinSize; i++) {
304 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
305 int sum = 0;
306 int j = 64; // Choose bin size 64 histogram as probablity distribution
307 for (int k = 0; k < j; k++) { // centered sum
308 int offset = i + k - j/2;
309 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
310 }
311 cumulativeSum += sum;
312 cumulative[i] = cumulativeSum;
313 }
314
315 // Normalize cumulative sum :
316
317 List<Line> randomLines = new LinkedList<Line>();
318
319 // Generate random numbers with identical line density distribution
320 for (int i = 0; i < number; i++) {
321 // 1. Generate a random integer between 0 and cumulativeSum
322 int randint = (int) (Math.random() * ((double) cumulativeSum));
323 //System.out.println("Random number = " + randint);
324 // 2. locate wavelength bin with equal value using binary search of cumulative distribution
325 int binElement = smallBinSize / 2; // initial bin number guess
326 boolean located = false; // flag indicating that we have found the bin
327 int interval = smallBinSize / 2;
328 while (!located) {
329 interval = interval / 2;
330 int sum = cumulative[binElement];
331
332 if (randint == sum || interval == 0) located = true;
333 else if (randint < sum) binElement = binElement - interval;
334 else if (randint > sum) binElement = binElement + interval;
335
336 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
337 }
338 // convert bin number to wavelength
339 double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min;
340 randomLines.add(new Line(null, wavelength));
341 }
342
343
344 //don't sort random line list because we might want to pick a subset
345 //Collections.sort(randomLines);
346 return randomLines;
347 }
348
349 }
350