001    package util;
002    
003    import java.io.*;
004    import java.text.DecimalFormat;
005    import java.util.*;
006    import java.lang.Math.*;
007    
008    /**
009     * <p>Create a wavelength sorted list of quasar emission lines in observed
010     * reference frame from Hewitt and Burbidge 1993ApJS...87..451H .
011     * The program will extract the emission line from column 81-85 (in quasar
012     * restframe) then 'redshift' it back to the observed frame by multiplying
013     * it by 1 + z(em) where the emission redshift is from columns 68-73 (ends
014     * with first non-numeric char or whitespace).</p>
015     * By John Talbot, Sep 16, 2003
016     *
017     * <ul>
018     * <li>INPUT: table1.txt data file from 1993ApJS...87..451H, with page headers
019     *     removed and ZZZ as the last line</li>
020     * <li>OUTPUT:
021     *    <ol>
022     *    <li>list sorted by wavelength of observed frame emission lines with the
023     *        quasar in which it occurs</li>
024     *    <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
025     *        128 A bin sizes.</li>
026     *    <li>Histogram for one bin size and half-bin size shifted count</li>
027     *    </ol>
028     * </li)
029     * </ul>
030     */
031    public class EmissionLines {
032        public static void main(String[] args) {
033            try {
034              List<Line> lines = getEmissionLines();
035              // optional tasks :
036              getStatistics(lines, "");
037              // Generate an equal number of random lines to verify distribution
038              List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
039              getStatistics(randomLines, "random");
040              int numberOfSimulations = 1000;
041              getVarianceOfRandomLines(lines, numberOfSimulations);
042            } catch (Exception e) {
043                System.out.println("Exception" + e);
044                System.exit(0);
045            }
046        }
047    
048        public static void getVarianceOfRandomLines(List<Line> lines, int aNumberOfSimulations) throws java.io.IOException {
049            int smallBinSize = 8192;
050            int maximum[] = new int[smallBinSize];
051            int minimum[] = new int[smallBinSize];
052            int sum[]     = new int[smallBinSize];
053            int sum2[]    = new int[smallBinSize];
054            Arrays.fill(minimum, Integer.MAX_VALUE);   // set to upper bound for integers for collecting minima
055            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
056            double max = 9392;
057            for (int n = 0; n < aNumberOfSimulations; n++) {
058                List<Line> randomLines = getRandomLines(lines, lines.size() - 28);
059     
060                // produce 1 A bin size histogram of random lines
061                int histogram[] = new int[smallBinSize];
062                for (Line line : randomLines) {
063                    double wavelength = line.getWavelength();
064                    int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
065                    if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
066                }
067    /**    // simpler code which makes use of collections :
068                Collection<Integer> histogram = new LinkedList<Integer>();
069                int maximumElementValue = Integer,MAX_INT;
070                for(Line line : randomLines) { 
071                    int binElement = (int) (((double)smallBinSize)*(line.getWavelength()-min)/(max-min));
072                    maximumElementValue = max(maximumElementValue, binElement);
073                    histogram.add(binElement);
074                }
075                for (int i = 0; i < maximumElementValue; i++) System.out.println(Collections.frequency(histogram, i));
076    */
077                // compute bin 4 histogram of random lines and save min, max, sum and sum2            
078                int histogram4[] = new int[smallBinSize];
079                for (int i = 0; i < smallBinSize; i++) {
080                    int box = 0;
081                    int j = 4;  // Choose bin size 4
082                    for (int k = 0; k < j; k++) {   // centered sum
083                        int offset = i + k - j/2;
084                        if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
085                    }
086                    histogram4[i] = box;
087                    sum[i] += box;
088                    sum2[i] += box*box;
089                    minimum[i] = Math.min(minimum[i], box);
090                    maximum[i] = Math.max(maximum[i], box);
091                }
092            }
093    
094    
095            // produce 1 A bin size histogram of observed lines
096            int realHistogram[] = new int[smallBinSize];
097            for (Line line : lines) {
098                double wavelength = line.getWavelength();
099                int binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
100                if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
101            }
102    
103            // compute bin 4 histogram of observed lines 
104            int realHistogram4[] = new int[smallBinSize];
105            for (int i = 0; i < smallBinSize; i++) {
106                int box = 0;
107                int j = 4;  // Choose bin size 4
108                for (int k = 0; k < j; k++) {   // centered sum
109                    int offset = i + k - j/2;
110                    if (offset >= 0 && offset < smallBinSize)
111                      box += realHistogram[offset];
112                }
113                realHistogram4[i] = box;
114            }
115    
116    
117            FileWriter writer = new FileWriter("data/variance" + aNumberOfSimulations + ".txt");
118            DecimalFormat zFormatter = new DecimalFormat("0");
119            double average[] = new double[smallBinSize];
120            double standarddev[]= new double[smallBinSize];
121            for (int i = 0; i < smallBinSize; i++) {
122                average[i] = (double) sum[i] / (double) aNumberOfSimulations;
123                double s  = (double) sum[i];
124                double s2 = (double) sum2[i];
125                double num = (double)aNumberOfSimulations;
126                
127                standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
128    
129                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
130                double significance = 0.0;    // clip negative deviations to zero
131                double deviation = ((double) realHistogram4[i]) - average[i];
132                if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];   
133                
134                writer.write(zFormatter.format(wavelength) + " " + 
135                             formatFloatCount(average[i]) + " " + 
136                             formatCount(minimum[i])  + " " + 
137                             formatCount(maximum[i])  + " " + 
138                             formatFloatCount(average[i] + standarddev[i])  + " " +                       
139                             formatFloatCount(average[i] + 2*standarddev[i])  + " " +
140                             formatFloatCount(average[i] + 3*standarddev[i])  + " " +                       
141                             formatFloatCount(average[i] + 4*standarddev[i])  + " " +
142                             formatFloatCount(average[i] + 5*standarddev[i])  + " " +
143                             formatCount(realHistogram4[i])  + " " +
144                             formatFloatCount(significance)  + " " +
145                             "\n");
146                
147            }
148            writer.close();
149        }
150    
151        /** Get a sorted list of quasar emission lines in the observed frame from
152         * Hewitt and Burbidge 1993ApJS...87..451H.
153         * Note: There is a many to one correspondence between a line and a quasar
154         * (i.e. several lines map to the same quasar)
155         *
156         * @return list of quasar emission lines
157         */
158        public static List<Line> getEmissionLines() throws java.io.IOException {
159            FileReader reader = new FileReader("data/table1noheaders.txt");
160            BufferedReader in = new BufferedReader(reader);
161            int rowNumber = 1;     // resets to one after each quasar
162            Quasar qso = new Quasar();
163            while (true) {
164                String row = in.readLine();
165                if (row.indexOf("ZZZ") > -1)
166                    break;  // last line
167                else if (row.length() == 0) {  // blank line ends each quasar
168                    qso = new Quasar();
169                    rowNumber = 0;
170                } else if (rowNumber == 1) {  // first row
171                    qso.addFirstRow(row);
172                } else if (rowNumber == 2) {  // second row
173                    qso.addSecondRow(row);
174                } else {
175                    qso.addMoreRows(row);
176                }
177                rowNumber++;
178            }
179            reader.close();
180    // warning: [unchecked] unchecked method invocation: <T>sort(java.util.List<T>) in java.util.Collections is applied to (java.util.List<util.Line>)
181            Collections.sort(Quasar.allLines);   
182            System.out.println(Quasar.count + " quasars read");
183            return Quasar.allLines;
184        }
185    
186        /** Get statistics from a list of quasar emission lines
187         *
188         * @param lines a List of Line instances representing the quasar emssion lines
189         */
190        public static void getStatistics(List<Line> lines, String fileName) throws java.io.IOException {
191            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
192            double max = 9392;
193            int binSize = 8192;  // should be named maxBinCount
194    
195            // smallBinSize must produces 1 A bin size for 1200-9392 range
196            int smallBinSize = 8192;  // should be named maxSmallBinCount
197            double halfBinSize = (max - min) / ((double) (2*binSize));
198            int histogram[] = new int[smallBinSize];
199            int histogram1[] = new int[binSize];
200            int histogram2[] = new int[binSize];
201    
202            FileWriter writer = new FileWriter("data/" + fileName + "emissionlines.txt");
203            for (Line line : lines) {
204                Star quasar =  line.getSource();
205                writer.write(line.toString());
206                if (quasar != null) writer.write(" " + quasar);
207                writer.write("\n");
208    
209                double wavelength = line.getWavelength();
210                int binElement = (int) (((double)binSize)*(wavelength  -min)/(max - min));
211                if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
212    
213                // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
214                binElement = (int) (((double)binSize)*(wavelength-min-halfBinSize)/(max-min));
215                if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
216    
217                // produce 1 A histogram
218                binElement = (int) (((double)smallBinSize)*(wavelength-min)/(max-min));
219                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
220            }
221            writer.close();
222    
223            // Output a single histogram for the specified bin size
224            DecimalFormat zFormatter = new DecimalFormat("0");
225            writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt");
226            for (int i = 0; i < binSize; i++) {
227                double wavelength = ((double)i) * (max - min) / ((double)binSize) + min;
228                writer.write(zFormatter.format(wavelength) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
229            }
230            writer.close();
231    
232            // Output multiple histograms starting with bin size 1 and increasing in powers of 2
233    
234            writer = new FileWriter("data/" + fileName + "histogram.txt");
235            for (int i = 0; i < smallBinSize; i++) {
236                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
237                writer.write(zFormatter.format(wavelength) + " ");
238                for (int j = 1; j < 256; j = j*2) {
239                    int sum = 0;
240                    // centered sum
241                    for (int k = 0; k < j; k++) {
242                        int offset = i + k - j/2;
243                        if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
244                    }
245                    writer.write(formatCount(sum) + " ");
246                    // TODO: append at end of line a code for each histogram
247                    // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
248                }
249                writer.write("\n");
250            }
251            writer.close();
252        }
253    
254        // TODO: Fix this to allow histogram counts greater than 999
255        public static String formatCount(int aCount) {
256            String count = Integer.toString(aCount);
257            switch (count.length()) {
258                case 1: count = " " + count;
259                case 2: count = " " + count;
260                case 3:
261            }
262            return count;
263        }
264    
265        static final DecimalFormat fFormatter = new DecimalFormat("0.00");
266        public static String formatFloatCount(double aCount) {
267            String count = fFormatter.format(aCount);
268            switch (count.length()) {
269                case 4: count = " " + count;
270            }
271            return count;
272        }
273    
274        /** Monte-Carlo generation of a specified number of emission lines randomly generated
275         * with the same distribution as bin 64 histogram which simulated
276         * the smoothed emission line density distribution.
277         *
278         * @param lines a List of Line instances representing peaks in the real quasar emission line data.
279         * @param number the number of random lines to generate
280         * @return a list of quasar emission lines with the same distribution as the input list
281         */
282    
283        public static List<Line> getRandomLines(List<Line> lines, int number) {
284    
285            double min = 1200;   // this wavelength range eliminates 28 quasars after 9392
286            double max = 9392;
287            int binSize = 8192;  // should be named maxBinCount
288    
289            // smallBinSize must produces 1 A bin size for 1200-9392 range
290            int smallBinSize = 8192;  // should be named maxSmallBinCount
291            int histogram[] = new int[smallBinSize];
292            int cumulative[] = new int[smallBinSize];
293    
294            // produce 1 A bin size histogram
295            for (Line line : lines) {
296                double wavelength = line.getWavelength();
297                int binElement = (int) (((double)smallBinSize) * (wavelength - min)/(max-min));
298                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
299            }
300    
301            // Compute cumulative sum of probability distribution
302            int cumulativeSum = 0;
303            for (int i = 0; i < smallBinSize; i++) {
304                double wavelength = ((double)i)*(max-min) / ((double)smallBinSize) + min;
305                int sum = 0;
306                int j = 64;  // Choose bin size 64 histogram as probablity distribution
307                for (int k = 0; k < j; k++) {   // centered sum
308                    int offset = i + k - j/2;
309                    if (offset >= 0 && offset < smallBinSize)
310                      sum += histogram[offset];
311                }
312                cumulativeSum += sum;
313                cumulative[i] = cumulativeSum;
314            }
315    
316            // Normalize cumulative sum :
317    
318            List<Line> randomLines = new LinkedList<Line>();
319    
320            // Generate random numbers with identical line density distribution
321            for (int i = 0; i < number; i++) {
322                // 1. Generate a random integer between 0 and cumulativeSum
323                int randint = (int) (Math.random() * ((double) cumulativeSum));
324                //System.out.println("Random number = " + randint);
325                // 2. locate wavelength bin with equal value using binary search of cumulative distribution
326                int binElement = smallBinSize / 2;  // initial bin number guess
327                boolean located = false;     // flag indicating that we have found the bin
328                int interval = smallBinSize / 2;
329                while (!located) {
330                    interval = interval / 2;
331                    int sum = cumulative[binElement];
332    
333                    if (randint == sum || interval == 0) located = true;
334                    else if (randint < sum) binElement = binElement - interval;
335                    else if (randint > sum) binElement = binElement + interval;
336    
337                    //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
338                }
339                // convert bin number to wavelength
340                double wavelength = ((double)binElement)*(max-min) / ((double)smallBinSize) + min;
341                randomLines.add(new Line(null, wavelength));
342            }
343    
344    
345            //don't sort random line list because we might want to pick a subset
346            //Collections.sort(randomLines);
347            return randomLines;
348        }
349    
350    }
351