001 package util;
002
003 import java.io.*;
004 import java.text.DecimalFormat;
005 import java.util.*;
006 import java.lang.Math.*;
007
008 /**
009 * <p>Create a wavelength sorted list of quasar emission lines in observed
010 * reference frame from Hewitt and Burbidge 1993ApJS...87..451H .
011 * The program will extract the emission line from column 81-85 (in quasar
012 * restframe) then 'redshift' it back to the observed frame by multiplying
013 * it by 1 + z(em) where the emission redshift is from columns 68-73 (ends
014 * with first non-numeric char or whitespace).</p>
015 * By John Talbot, Sep 16, 2003
016 *
017 * <ul>
018 * <li>INPUT: table1.txt data file from 1993ApJS...87..451H, with page headers
019 * removed and ZZZ as the last line</li>
020 * <li>OUTPUT:
021 * <ol>
022 * <li>list sorted by wavelength of observed frame emission lines with the
023 * quasar in which it occurs</li>
024 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
025 * 128 A bin sizes.</li>
026 * <li>Histogram for one bin size and half-bin size shifted count</li>
027 * </ol>
028 * </li)
029 * </ul>
030 */
031 public class EmissionLines {
032 public static void main(String[] args) {
033 try {
034 List<Line> lines = getEmissionLines();
035 // optional tasks :
036 getStatistics(lines, "");
037 // Generate an equal number of random lines to verify distribution
038 List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
039 getStatistics(randomLines, "random");
040 int numberOfSimulations = 1000;
041 getVarianceOfRandomLines(lines, numberOfSimulations);
042 } catch (Exception e) {
043 System.out.println("Exception" + e);
044 System.exit(0);
045 }
046 }
047
048 public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException {
049 int smallBinSize = 8192;
050 int maximum[] = new int[smallBinSize];
051 int minimum[] = new int[smallBinSize];
052 int sum[] = new int[smallBinSize];
053 int sum2[] = new int[smallBinSize];
054 Arrays.fill(minimum, Integer.MAX_VALUE); // set to upper bound for integers for collecting minima
055 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
056 double max = 9392;
057 for (int n = 0; n < aNumberOfSimulations; n++) {
058 List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
059
060 // produce 1 A bin size histogram of random lines
061 int histogram[] = new int[smallBinSize];
062 for (Line line : randomLines) {
063 double wavelength = line.getWavelength();
064 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
065 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
066 }
067 /** // simpler code which makes use of collections :
068 Collection<Integer> histogram = new LinkedList<Integer>();
069 int maximumElementValue = Integer,MAX_INT;
070 for(Line line : randomLines) {
071 int binElement = (int) (((double)smallBinSize)*(line.getWavelength()-min)/(max-min));
072 maximumElementValue = max(maximumElementValue, binElement);
073 histogram.add(binElement);
074 }
075 for (int i = 0; i < maximumElementValue; i++) System.out.println(Collections.frequency(histogram, i));
076 */
077 // compute bin 4 histogram of random lines and save min, max, sum and sum2
078 int histogram4[] = new int[smallBinSize];
079 for (int i = 0; i < smallBinSize; i++) {
080 int box = 0;
081 int j = 4; // Choose bin size 4
082 for (int k = 0; k < j; k++) { // centered sum
083 int offset = i + k - j/2;
084 if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
085 }
086 histogram4[i] = box;
087 sum[i] += box;
088 sum2[i] += box*box;
089 minimum[i] = Math.min(minimum[i], box);
090 maximum[i] = Math.max(maximum[i], box);
091 }
092 }
093
094
095 // produce 1 A bin size histogram of observed lines
096 int realHistogram[] = new int[smallBinSize];
097 for (Line line : lines) {
098 double wavelength = line.getWavelength();
099 int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
100 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
101 }
102
103 // compute bin 4 histogram of observed lines
104 int realHistogram4[] = new int[smallBinSize];
105 for (int i = 0; i < smallBinSize; i++) {
106 int box = 0;
107 int j = 4; // Choose bin size 4
108 for (int k = 0; k < j; k++) { // centered sum
109 int offset = i + k - j/2;
110 if (offset >= 0 && offset < smallBinSize)
111 box += realHistogram[offset];
112 }
113 realHistogram4[i] = box;
114 }
115
116
117 FileWriter writer = new FileWriter("data/variance" + aNumberOfSimulations + ".txt");
118 DecimalFormat zFormatter = new DecimalFormat("0");
119 double average[] = new double[smallBinSize];
120 double standarddev[]= new double[smallBinSize];
121 for (int i = 0; i < smallBinSize; i++) {
122 average[i] = (double) sum[i] / (double) aNumberOfSimulations;
123 double s = (double) sum[i];
124 double s2 = (double) sum2[i];
125 double num = (double)aNumberOfSimulations;
126
127 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
128
129 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
130 double significance = 0.0; // clip negative deviations to zero
131 double deviation = ((double) realHistogram4[i]) - average[i];
132 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];
133
134 writer.write(zFormatter.format(wavelength) + " " +
135 formatFloatCount(average[i]) + " " +
136 formatCount(minimum[i]) + " " +
137 formatCount(maximum[i]) + " " +
138 formatFloatCount(average[i] + standarddev[i]) + " " +
139 formatFloatCount(average[i] + 2*standarddev[i]) + " " +
140 formatFloatCount(average[i] + 3*standarddev[i]) + " " +
141 formatFloatCount(average[i] + 4*standarddev[i]) + " " +
142 formatFloatCount(average[i] + 5*standarddev[i]) + " " +
143 formatCount(realHistogram4[i]) + " " +
144 formatFloatCount(significance) + " " +
145 "\n");
146
147 }
148 writer.close();
149 }
150
151 /** Get a sorted list of quasar emission lines in the observed frame from
152 * Hewitt and Burbidge 1993ApJS...87..451H.
153 * Note: There is a many to one correspondence between a line and a quasar
154 * (i.e. several lines map to the same quasar)
155 *
156 * @return list of quasar emission lines
157 */
158 public static List<Line> getEmissionLines() throws java.io.IOException {
159 FileReader reader = new FileReader("data/table1noheaders.txt");
160 BufferedReader in = new BufferedReader(reader);
161 int rowNumber = 1; // resets to one after each quasar
162 Quasar qso = new Quasar();
163 while (true) {
164 String row = in.readLine();
165 if (row.indexOf("ZZZ") > -1)
166 break; // last line
167 else if (row.length() == 0) { // blank line ends each quasar
168 qso = new Quasar();
169 rowNumber = 0;
170 } else if (rowNumber == 1) { // first row
171 qso.addFirstRow(row);
172 } else if (rowNumber == 2) { // second row
173 qso.addSecondRow(row);
174 } else {
175 qso.addMoreRows(row);
176 }
177 rowNumber++;
178 }
179 reader.close();
180 // warning: [unchecked] unchecked method invocation: <T>sort(java.util.List<T>) in java.util.Collections is applied to (java.util.List<util.Line>)
181 Collections.sort(Quasar.allLines);
182 System.out.println(Quasar.count + " quasars read");
183 return Quasar.allLines;
184 }
185
186 /** Get statistics from a list of quasar emission lines
187 *
188 * @param lines a List of Line instances representing the quasar emssion lines
189 */
190 public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException {
191 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
192 double max = 9392;
193 int binSize = 8192; // should be named maxBinCount
194
195 // smallBinSize must produces 1 A bin size for 1200-9392 range
196 int smallBinSize = 8192; // should be named maxSmallBinCount
197 double halfBinSize = (max - min) / ((double) (2*binSize));
198 int histogram[] = new int[smallBinSize];
199 int histogram1[] = new int[binSize];
200 int histogram2[] = new int[binSize];
201
202 FileWriter writer = new FileWriter("data/" + fileName + "emissionlines.txt");
203 for (Line line : lines) {
204 Star quasar = line.getSource();
205 writer.write(line.toString());
206 if (quasar != null) writer.write(" " + quasar);
207 writer.write("\n");
208
209 double wavelength = line.getWavelength();
210 int binElement = (int) (((double)binSize)*(wavelength -min)/(max - min));
211 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
212
213 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
214 binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min));
215 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
216
217 // produce 1 A histogram
218 binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
219 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
220 }
221 writer.close();
222
223 // Output a single histogram for the specified bin size
224 DecimalFormat zFormatter = new DecimalFormat("0");
225 writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt");
226 for (int i = 0; i < binSize; i++) {
227 double wavelength = ((double)i) * (max - min) / ((double)binSize) + min;
228 writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
229 }
230 writer.close();
231
232 // Output multiple histograms starting with bin size 1 and increasing in powers of 2
233
234 writer = new FileWriter("data/" + fileName + "histogram.txt");
235 for (int i = 0; i < smallBinSize; i++) {
236 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
237 writer.write(zFormatter.format(wavelength) + " ");
238 for (int j = 1; j < 256; j = j*2) {
239 int sum = 0;
240 // centered sum
241 for (int k = 0; k < j; k++) {
242 int offset = i + k - j/2;
243 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
244 }
245 writer.write(formatCount(sum) + " ");
246 // TODO: append at end of line a code for each histogram
247 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
248 }
249 writer.write("\n");
250 }
251 writer.close();
252 }
253
254 // TODO: Fix this to allow histogram counts greater than 999
255 public static String formatCount(int aCount) {
256 String count = Integer.toString(aCount);
257 switch (count.length()) {
258 case 1: count = " " + count;
259 case 2: count = " " + count;
260 case 3:
261 }
262 return count;
263 }
264
265 static final DecimalFormat fFormatter = new DecimalFormat("0.00");
266 public static String formatFloatCount(double aCount) {
267 String count = fFormatter.format(aCount);
268 switch (count.length()) {
269 case 4: count = " " + count;
270 }
271 return count;
272 }
273
274 /** Monte-Carlo generation of a specified number of emission lines randomly generated
275 * with the same distribution as bin 64 histogram which simulated
276 * the smoothed emission line density distribution.
277 *
278 * @param lines a List of Line instances representing peaks in the real quasar emission line data.
279 * @param number the number of random lines to generate
280 * @return a list of quasar emission lines with the same distribution as the input list
281 */
282
283 public static List<Line> getRandomLines(List<Line> lines, int number) {
284
285 double min = 1200; // this wavelength range eliminates 28 quasars after 9392
286 double max = 9392;
287 int binSize = 8192; // should be named maxBinCount
288
289 // smallBinSize must produces 1 A bin size for 1200-9392 range
290 int smallBinSize = 8192; // should be named maxSmallBinCount
291 int histogram[] = new int[smallBinSize];
292 int cumulative[] = new int[smallBinSize];
293
294 // produce 1 A bin size histogram
295 for (Line line : lines) {
296 double wavelength = line.getWavelength();
297 int binElement = (int) (((double)smallBinSize) * (wavelength - min)/(max-min));
298 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
299 }
300
301 // Compute cumulative sum of probability distribution
302 int cumulativeSum = 0;
303 for (int i = 0; i < smallBinSize; i++) {
304 double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
305 int sum = 0;
306 int j = 64; // Choose bin size 64 histogram as probablity distribution
307 for (int k = 0; k < j; k++) { // centered sum
308 int offset = i + k - j/2;
309 if (offset >= 0 && offset < smallBinSize)
310 sum += histogram[offset];
311 }
312 cumulativeSum += sum;
313 cumulative[i] = cumulativeSum;
314 }
315
316 // Normalize cumulative sum :
317
318 List<Line> randomLines = new LinkedList<Line>();
319
320 // Generate random numbers with identical line density distribution
321 for (int i = 0; i < number; i++) {
322 // 1. Generate a random integer between 0 and cumulativeSum
323 int randint = (int) (Math.random() * ((double) cumulativeSum));
324 //System.out.println("Random number = " + randint);
325 // 2. locate wavelength bin with equal value using binary search of cumulative distribution
326 int binElement = smallBinSize / 2; // initial bin number guess
327 boolean located = false; // flag indicating that we have found the bin
328 int interval = smallBinSize / 2;
329 while (!located) {
330 interval = interval / 2;
331 int sum = cumulative[binElement];
332
333 if (randint == sum || interval == 0) located = true;
334 else if (randint < sum) binElement = binElement - interval;
335 else if (randint > sum) binElement = binElement + interval;
336
337 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
338 }
339 // convert bin number to wavelength
340 double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min;
341 randomLines.add(new Line(null, wavelength));
342 }
343
344
345 //don't sort random line list because we might want to pick a subset
346 //Collections.sort(randomLines);
347 return randomLines;
348 }
349
350 }
351