001    package util.sdss;
002    
003    import java.io.*;
004    import java.text.DecimalFormat;
005    import java.util.*;
006    
007    /**
008     * <p>Create a wavelength sorted list of quasar emission lines in observed
009     * reference frame from SDSS DR1.</p>
010     * By John Talbot, Nov 5, 2003
011     *
012     * <ul>
013     * <li>INPUT: dr1qso-by-plate.txt data file and one quasar FITS files per spectra</li>
014     * <li>OUTPUT:
015     *    <ol>
016     *    <li>list sorted by wavelength of observed frame emission lines with the
017     *        quasar in which it occurs</li>
018     *    <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
019     *        128 A bin sizes.</li>
020     *    <li>Histogram for one bin size and half-bin size shifted count</li>
021     *    </ol>
022     * </li)
023     * </ul>
024     */
025    public class EmissionLines {
026        public static final String outputDirectory = "data/sdss/results/strongest";
027        
028        public static void main(String[] args) {
029            try {
030              List<Line> lines = getEmissionLines();
031              System.out.println(lines.size() + "lines read");
032              // optional tasks :
033              getStatistics(lines, "");
034              // Generate an equal number of random lines to verify distribution
035              List<Line> randomLines = getRandomLines(lines, lines.size());
036              getStatistics(randomLines, "random");
037              int numberOfSimulations = 1000;
038              getVarianceOfRandomLines(lines, numberOfSimulations);
039            } catch (Exception e) {
040                System.out.println("Exception" + e);
041                System.exit(0);
042            }
043        }
044    
045        public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException {
046            int smallBinSize = 8192;
047            int maximum[] = new int[smallBinSize];
048            int minimum[] = new int[smallBinSize];
049            int sum[]     = new int[smallBinSize];
050            int sum2[]    = new int[smallBinSize];
051            for (int i = 0; i < smallBinSize; i++) {
052                minimum[i] = Integer.MAX_VALUE;      // set to upper bound for integers for collecting minima
053            }
054    
055            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
056            double max = 9392;
057            for (int n = 0; n < aNumberOfSimulations; n++) {
058                List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
059     
060                // produce 1 A bin size histogram of random lines
061                int histogram[] = new int[smallBinSize];
062                for (Iterator x = randomLines.iterator(); x.hasNext(); ) {
063                    Line line = (Line) x.next();
064                    double wavelength = line.getWavelength();
065                    int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
066                    if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
067                }
068    
069                // compute bin 4 histogram of random lines and save min, max, sum and sum2            
070                int histogram4[] = new int[smallBinSize];
071                for (int i = 0; i < smallBinSize; i++) {
072                    int box = 0;
073                    int j = 4;  // Choose bin size 4
074                    for (int k = 0; k < j; k++) {   // centered sum
075                        int offset = i + k - j/2;
076                        if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
077                    }
078                    histogram4[i] = box;
079                    sum[i] += box;
080                    sum2[i] += box*box;
081                    if (box < minimum[i]) minimum[i] = box;
082                    if (box > maximum[i]) maximum[i] = box;
083                }
084            }
085    
086            // produce 1 A bin size histogram of observed lines
087            int realHistogram[] = new int[smallBinSize];
088            for (Line line : lines) {
089                double wavelength = line.getWavelength();
090                int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
091                if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
092            }
093    
094            // compute bin 4 histogram of observed lines 
095            int realHistogram4[] = new int[smallBinSize];
096            for (int i = 0; i < smallBinSize; i++) {
097                int box = 0;
098                int j = 4;  // Choose bin size 4
099                for (int k = 0; k < j; k++) {   // centered sum
100                    int offset = i + k - j/2;
101                    if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset];
102                }
103                realHistogram4[i] = box;
104            }
105    
106    
107            FileWriter writer = new FileWriter(outputDirectory + "/variance" + aNumberOfSimulations + ".txt");
108            DecimalFormat zFormatter = new DecimalFormat("0");
109            double average[] = new double[smallBinSize];
110            double standarddev[]= new double[smallBinSize];
111            for (int i = 0; i < smallBinSize; i++) {
112                average[i] = (double) sum[i] / (double) aNumberOfSimulations;
113                double s  = (double) sum[i];
114                double s2 = (double) sum2[i];
115                double num = (double)aNumberOfSimulations;
116                
117                standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
118    
119                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
120                double significance = 0.0;    // clip negative deviations to zero
121                double deviation = ((double) realHistogram4[i]) - average[i];
122                if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];   
123                
124                writer.write(zFormatter.format(wavelength) + " " + 
125                             formatFloatCount(average[i]) + " " + 
126                             formatCount(minimum[i])  + " " + 
127                             formatCount(maximum[i])  + " " + 
128                             formatFloatCount(average[i] + standarddev[i])  + " " +                       
129                             formatFloatCount(average[i] + 2*standarddev[i])  + " " +
130                             formatFloatCount(average[i] + 3*standarddev[i])  + " " +                       
131                             formatFloatCount(average[i] + 4*standarddev[i])  + " " +
132                             formatFloatCount(average[i] + 5*standarddev[i])  + " " +
133                             formatCount(realHistogram4[i])  + " " +
134                             formatFloatCount(significance)  + " " +
135                             "\n");
136                
137            }
138            writer.close();
139        }
140    
141        /** Get a sorted list of quasar emission lines in the observed frame from
142         * SDSS spectra.
143         * Note: There is a many to one correspondence between a line and a quasar
144         * (i.e. several lines map to the same quasar)
145         *
146         * @return a list of quasar emission lines
147         */
148        public static List<Line> getEmissionLines() throws java.io.IOException {
149            List<Line> allLines = new ArrayList<Line>();
150            try {
151              DecimalFormat xFormatter = new DecimalFormat("0.000");
152              DecimalFormat yFormatter = new DecimalFormat("0.000");
153              List<Quasar> quasars = Quasar.readQuasars();
154              int numberOfQuasars = quasars.size();
155              for (int i = 0; i < numberOfQuasars; i++) {
156                Quasar qso = (Quasar) quasars.get(i);
157                System.out.println(numberOfQuasars-i);
158                //System.out.println("Quasar : " + qso);
159                List<Line> lines = qso.getEmissionLines();
160                
161                Collections.sort(lines, Line.STRENGTH_COMPARATOR);
162                allLines.addAll(lines);  // collect statistics on all lines
163    
164                //qso.showSpectra();
165    /*
166                for (int j = 0; j < lines.size(); j++) {
167                    Line line = (Line) lines.get(j);
168                    System.out.println(xFormatter.format(line.getWavelength()) + "+-" + 
169                                       xFormatter.format(line.wavelengthErr) + 
170                                       " sigma=" + 
171                                       xFormatter.format(line.sigma) + "+-" +
172                                       xFormatter.format(line.sigmaErr) +
173                                       " height=" +
174                                       xFormatter.format(line.height) + "+-" +
175                                       xFormatter.format(line.heightErr));
176                }
177    */
178              }
179            } catch (IOException e) {
180                System.out.println("error : " + e);
181            }
182            Collections.sort(allLines);
183            return allLines;
184        }
185    
186        /** Get statistics from a list of quasar emission lines
187         *
188         * @param lines a List of Line instances representing the quasar emssion lines
189         */
190        public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException {
191            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
192            double max = 9392;
193            int binSize = 8192;  // should be named maxBinCount
194    
195            // smallBinSize must produces 1 A bin size for 1200-9392 range
196            int smallBinSize = 8192;  // should be named maxSmallBinCount
197            double halfBinSize = (max - min) / ((double) (2*binSize));
198            int histogram[] = new int[smallBinSize];
199            int histogram1[] = new int[binSize];
200            int histogram2[] = new int[binSize];
201    
202            FileWriter writer = new FileWriter(outputDirectory + "/" + fileName + "emissionlines.txt");
203            for (Line line : lines) {
204                Star quasar =  line.getSource();
205                writer.write(line.toString());
206                if (quasar != null) writer.write(" " + quasar);
207                writer.write("\n");
208    
209                double wavelength = line.getWavelength();
210                int binElement = (int) (((double)binSize)*(wavelength-min)/(max-min));
211                if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
212    
213                // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
214                binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min));
215                if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
216    
217                // produce 1 A histogram
218                binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
219                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
220            }
221            writer.close();
222    
223            // Output a single histogram for the specified bin size
224            DecimalFormat zFormatter = new DecimalFormat("0");
225            writer = new FileWriter(outputDirectory + "/" + fileName + "histogram" + binSize + ".txt");
226            for (int i = 0; i < binSize; i++) {
227                double wavelength = ((double)i)*(max-min) / ((double)binSize) + min;
228                writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
229            }
230            writer.close();
231    
232            // Output multiple histograms starting with bin size 1 and increasing in powers of 2
233    
234            writer = new FileWriter(outputDirectory + "/" + fileName + "histogram.txt");
235            for (int i = 0; i < smallBinSize; i++) {
236                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
237                writer.write(zFormatter.format(wavelength) + " ");
238                for (int j = 1; j < 256; j = j*2) {
239                    int sum = 0;
240                    // centered sum
241                    for (int k = 0; k < j; k++) {
242                        int offset = i + k - j/2;
243                        if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
244                    }
245                    writer.write(formatCount(sum) + " ");
246                    // TODO: append at end of line a code for each histogram
247                    // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
248                }
249                writer.write("\n");
250            }
251            writer.close();
252        }
253    
254        // TODO: Fix this to allow histogram counts greater than 999
255        public static String formatCount(int aCount) {
256            String count = Integer.toString(aCount);
257            switch (count.length()) {
258                case 1: count = " " + count;
259                case 2: count = " " + count;
260                case 3:
261            }
262            return count;
263        }
264    
265        static final DecimalFormat fFormatter = new DecimalFormat("0.00");
266        public static String formatFloatCount(double aCount) {
267            String count = fFormatter.format(aCount);
268            switch (count.length()) {
269                case 4: count = " " + count;
270            }
271            return count;
272        }
273    
274        /** Monte-Carlo generation of a specified number of emission lines randomly generated
275         * with the same distribution as bin 64 histogram which simulated
276         * the smoothed emission line density distribution.
277         *
278         * @param lines a List of Line instances representing peaks in the real quasar emission line data.
279         * @param number the number of random lines to generate
280         * @return a list of quasar emission lines with the same distribution as the input list
281         */
282    
283        public static List<Line> getRandomLines(List<Line> lines, int number) {
284    
285            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
286            double max = 9392;
287            int binSize = 8192;  // should be named maxBinCount
288    
289            // smallBinSize must produces 1 A bin size for 1200-9392 range
290            int smallBinSize = 8192;  // should be named maxSmallBinCount
291            int histogram[] = new int[smallBinSize];
292            int cumulative[] = new int[smallBinSize];
293    
294            // produce 1 A bin size histogram
295            for (Line line : lines) {
296                double wavelength = line.getWavelength();
297                int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
298                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
299            }
300    
301            // Compute cumulative sum of probability distribution
302            int cumulativeSum = 0;
303            for (int i = 0; i < smallBinSize; i++) {
304                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
305                int sum = 0;
306                int j = 64;  // Choose bin size 64 histogram as probablity distribution
307                for (int k = 0; k < j; k++) {   // centered sum
308                    int offset = i + k - j/2;
309                    if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
310                }
311                cumulativeSum += sum;
312                cumulative[i] = cumulativeSum;
313            }
314    
315            // Normalize cumulative sum :
316    
317            List<Line> randomLines = new LinkedList<Line>();
318    
319            // Generate random numbers with identical line density distribution
320            for (int i = 0; i < number; i++) {
321                // 1. Generate a random integer between 0 and cumulativeSum
322                int randint = (int) (Math.random() * ((double) cumulativeSum));
323                //System.out.println("Random number = " + randint);
324                // 2. locate wavelength bin with equal value using binary search of cumulative distribution
325                int binElement = smallBinSize / 2;  // initial bin number guess
326                boolean located = false;     // flag indicating that we have found the bin
327                int interval = smallBinSize / 2;
328                while (!located) {
329                    interval = interval / 2;
330                    int sum = cumulative[binElement];
331    
332                    if (randint == sum || interval == 0) located = true;
333                    else if (randint < sum) binElement = binElement - interval;
334                    else if (randint > sum) binElement = binElement + interval;
335    
336                    //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
337                }
338                // convert bin number to wavelength
339                double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min;
340                randomLines.add(new Line(null, wavelength));
341            }
342    
343    
344            //don't sort random line list because we might want to pick a subset
345            //Collections.sort(randomLines);
346            return randomLines;
347        }
348    
349    }
350