001    package util.redshift;
002    
003    import java.io.*;
004    import java.text.DecimalFormat;
005    import java.util.*;
006    
007    /**
008     * <p>Create a redshift sorted list of quasars from Veron (2003) .
009     * The program will extract the emission emission line redshift
010     * column 71- 75.</p>
011     * By John Talbot, Oct 4, 2003
012     *
013     * <ul>
014     * <li>INPUT: table1.txt data file from 2003ApJS...87..451H</li>
015     * <li>OUTPUT:
016     *    <ol>
017     *    <li>list of quasars sorted by redshifts</li>
018     *    <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
019     *        128 A bin sizes.</li>
020     *    <li>Histogram for one bin size and half-bin size shifted count</li>
021     *    </ol>
022     * </li)
023     * </ul>
024     */
025    public class Redshifts {
026        public static final double min = 0.0;  // wavelength range with 512 data points at 0.01 z step
027        public static final double max = 5.12; // this upper bound for redshift excludes only 12 quasars
028        public static final int cutoff = 12;   // number of quasars which fall outside the min to max redshift range
029        public static final DecimalFormat zFormatter = new DecimalFormat("0.00000");  // redshift precision
030        
031        public static void main(String[] args) {
032            try {
033              List<Quasar> quasars = Quasar.readQuasars();
034              // optional tasks :
035              getStatistics(quasars, "z");
036              // Generate an equal number of random lines to verify distribution
037              List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff);
038              getStatistics(randomQuasars, "randomz");
039              int numberOfSimulations = 1000;
040              getVarianceOfRandomQuasars(randomQuasars, numberOfSimulations);
041            } catch (Exception e) {
042                System.out.println("Exception" + e);
043                System.exit(0);
044            }
045        }
046    
047        public static void getVarianceOfRandomQuasars(List<Quasar> quasars, int aNumberOfSimulations) throws java.io.IOException {
048            int smallBinSize = 8192;
049            int maximum[] = new int[smallBinSize];
050            int minimum[] = new int[smallBinSize];
051            int sum[]     = new int[smallBinSize];
052            int sum2[]    = new int[smallBinSize];
053            java.util.Arrays.fill(minimum, Integer.MAX_VALUE); // set to upper bound for integers for collecting minima
054            for (int n = 0; n < aNumberOfSimulations; n++) {
055                List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff);
056     
057                // produce dz = (5.12/8192) = 0.000625 bin size histogram of random lines
058                int histogram[] = new int[smallBinSize];
059                for (Quasar qso : randomQuasars) {
060                    double z = qso.getZem();
061                    int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
062                    if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
063                }
064    
065                // compute bin dz = 8 * 0.000625 = .005 histogram of random lines and save min, max, sum and sum2            
066                int histogram8[] = new int[smallBinSize];
067                for (int i = 0; i < smallBinSize; i++) {
068                    int box = 0;
069                    int j = 8;  // Choose bin size 8 * 0.000625 = .005
070                    for (int k = 0; k < j; k++) {   // centered sum
071                        int offset = i + k - j/2;
072                        if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
073                    }
074                    histogram8[i] = box;
075                    sum[i] += box;
076                    sum2[i] += box*box;
077                    if (box < minimum[i]) minimum[i] = box;
078                    if (box > maximum[i]) maximum[i] = box;
079                }
080            }
081    
082            // produce dz = (5.12/8192) = 0.000625 bin size histogram of observed redshifts
083            int realHistogram[] = new int[smallBinSize];
084            for (Quasar qso : quasars) {
085                double z = qso.getZem();
086                int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
087                if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
088            }
089    
090            // compute bin dz = 8 * 0.000625 = .005 histogram of observed redshifts
091            int realHistogram8[] = new int[smallBinSize];
092            for (int i = 0; i < smallBinSize; i++) {
093                int box = 0;
094                int j = 8;  // Choose bin size 8 * 0.000625 = .005
095                for (int k = 0; k < j; k++) {   // centered sum
096                    int offset = i + k - j/2;
097                    if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset];
098                }
099                realHistogram8[i] = box;
100            }
101    
102    
103            FileWriter writer = new FileWriter("data/zvariance" + aNumberOfSimulations + ".txt");
104            double average[] = new double[smallBinSize];
105            double standarddev[]= new double[smallBinSize];
106            for (int i = 0; i < smallBinSize; i++) {
107                average[i] = (double) sum[i] / (double) aNumberOfSimulations;
108                double s  = (double) sum[i];
109                double s2 = (double) sum2[i];
110                double num = (double)aNumberOfSimulations;
111                
112                standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
113    
114                double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
115                double significance = 0.0;    // clip negative deviations to zero
116                double deviation = ((double) realHistogram8[i]) - average[i];
117                if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];   
118                
119                writer.write(zFormatter.format(z) + " " + 
120                             formatFloatCount(average[i]) + " " + 
121                             formatCount(minimum[i])  + " " + 
122                             formatCount(maximum[i])  + " " + 
123                             formatFloatCount(average[i] + standarddev[i])  + " " +                       
124                             formatFloatCount(average[i] + 2*standarddev[i])  + " " +
125                             formatFloatCount(average[i] + 3*standarddev[i])  + " " +                       
126                             formatFloatCount(average[i] + 4*standarddev[i])  + " " +
127                             formatFloatCount(average[i] + 5*standarddev[i])  + " " +
128                             formatCount(realHistogram8[i])  + " " +
129                             formatFloatCount(significance)  + " " +
130                             "\n");
131                
132            }
133            writer.close();
134        }
135    
136        /** Get statistics from a list of quasar redshifts
137         *
138         * @param quasars quasars instances (each having a redshift property)
139         * @param fileName the filename
140         */
141        public static void getStatistics(List<Quasar> quasars, String fileName) throws java.io.IOException {
142            int binSize = 8192;  // should be named maxBinCount
143    
144            int smallBinSize = 8192;  // should be named maxSmallBinCount
145            double halfBinSize = (max - min) / ((double) (2 * binSize));
146            int histogram[] = new int[smallBinSize];
147            int histogram1[] = new int[binSize];
148            int histogram2[] = new int[binSize];
149    
150            for (Quasar qso : quasars) {
151                double z = qso.getZem();
152                int binElement = (int) (((double)binSize)*(z-min)/(max-min));
153                if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
154    
155                // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
156                binElement = (int) (((double)binSize)*(z-min-halfBinSize)/(max-min));
157                if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
158    
159                // produce 1 A histogram
160                binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
161                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
162            }
163    
164            // Output a single histogram for the specified bin size
165            
166            FileWriter writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt");
167            for (int i = 0; i < binSize; i++) {
168                double z = ((double)i)*(max-min) / ((double)binSize) + min;
169                writer.write(zFormatter.format(z) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
170            }
171            writer.close();
172    
173            // Output multiple histograms starting with bin size 5.12/8192 and increasing in powers of 2
174    
175            writer = new FileWriter("data/" + fileName + "histogram.txt");
176            for (int i = 0; i < smallBinSize; i++) {
177                double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
178                writer.write(zFormatter.format(z) + " ");
179                for (int j = 1; j < 256; j = j*2) {
180                    int sum = 0;
181                    // centered sum
182                    for (int k = 0; k < j; k++) {
183                        int offset = i + k - j/2;
184                        if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
185                    }
186                    writer.write(formatCount(sum) + " ");
187                    // TODO: append at end of line a code for each histogram
188                    // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
189                }
190                writer.write("\n");
191            }
192            writer.close();
193        }
194    
195        // TODO: Fix this to allow histogram counts greater than 999
196        public static String formatCount(int aCount) {
197            String count = Integer.toString(aCount);
198            switch (count.length()) {
199                case 1: count = "    " + count; break;
200                case 2: count = "   " + count; break;
201                case 3: count = "  " + count; break;
202                case 4: count = " " + count; break;
203                case 5:
204            }
205            return count;
206        }
207    
208        static final DecimalFormat fFormatter = new DecimalFormat("0.00");
209        public static String formatFloatCount(double aCount) {
210            String count = fFormatter.format(aCount);
211            switch (count.length()) {
212                case 4: count = " " + count;
213            }
214            return count;
215        }
216    
217        /** Monte-Carlo generation of a specified number of redshift randomly generated
218         * with the same distribution as bin delta z = 0.08 histogram which simulated
219         * the smoothed redshift distribution.
220         *
221         * @param quasars a List of Quasar instances containing actual redshift value.
222         * @param number the number of random redshifts to generate
223         * @return a list of quasar instances with redshifts having the same distribution as the input list
224         */
225        public static List<Quasar> getRandomQuasars(List<Quasar> quasars, int number) {
226    
227            int binSize = 8192;  // should be named maxBinCount
228    
229            // smallBinSize produces dz = 5.12/8192 = 0.000625 bin size for z = 0.00 - 5.12 range
230            int smallBinSize = 8192;  // should be named maxSmallBinCount
231            int histogram[] = new int[smallBinSize];
232            int cumulative[] = new int[smallBinSize];
233    
234            // produce 1 A bin size histogram
235            for (Iterator x = quasars.iterator(); x.hasNext(); ) {
236                Quasar qso = (Quasar) x.next();
237                double z = qso.getZem();
238                int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
239                if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
240            }
241    
242            // Compute cumulative sum of probability distribution
243            int cumulativeSum = 0;
244            for (int i = 0; i < smallBinSize; i++) {
245                double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
246                int sum = 0;
247                int j = 128;  // Choose bin size dz = 128 * 5.12/8192 = .08 histogram as probablity distribution
248                for (int k = 0; k < j; k++) {   // centered sum
249                    int offset = i + k - j/2;
250                    if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
251                }
252                cumulativeSum += sum;
253                cumulative[i] = cumulativeSum;
254            }
255    
256            // Normalize cumulative sum :
257    
258            List<Quasar> randomQuasars = new LinkedList<Quasar>();
259    
260            // Generate random numbers with identical line density distribution
261            for (int i = 0; i < number; i++) {
262                // 1. Generate a random integer between 0 and cumulativeSum
263                int randint = (int) (Math.random() * ((double) cumulativeSum));
264                //System.out.println("Random number = " + randint);
265                // 2. locate wavelength bin with equal value using binary search of cumulative distribution
266                int binElement = smallBinSize / 2;  // initial bin number guess
267                boolean located = false;     // flag indicating that we have found the bin
268                int interval = smallBinSize / 2;
269                while (!located) {
270                    interval = interval / 2;
271                    int sum = cumulative[binElement];
272    
273                    if (randint == sum || interval == 0) located = true;
274                    else if (randint < sum) binElement = binElement - interval;
275                    else if (randint > sum) binElement = binElement + interval;
276    
277                    //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
278                }
279                // convert bin number to z
280                double z = ((double)binElement) * (max - min) / ((double)smallBinSize) + min;
281                Quasar qso = new Quasar(z);
282                randomQuasars.add(qso);
283            }
284    
285    
286            //don't sort random redshift list because we might want to pick a subset
287            //Collections.sort(randomLines);
288            return randomQuasars;
289        }
290    
291    }
292