001 package util.redshift;
002
003 import java.io.*;
004 import java.text.DecimalFormat;
005 import java.util.*;
006
007 /**
008 * <p>Create a redshift sorted list of quasars from Veron (2003) .
009 * The program will extract the emission emission line redshift
010 * column 71- 75.</p>
011 * By John Talbot, Oct 4, 2003
012 *
013 * <ul>
014 * <li>INPUT: table1.txt data file from 2003ApJS...87..451H</li>
015 * <li>OUTPUT:
016 * <ol>
017 * <li>list of quasars sorted by redshifts</li>
018 * <li>Centered running average histogram with 1, 2, 4, 8, 16, 32, 64 and
019 * 128 A bin sizes.</li>
020 * <li>Histogram for one bin size and half-bin size shifted count</li>
021 * </ol>
022 * </li)
023 * </ul>
024 */
025 public class Redshifts {
026 public static final double min = 0.0; // wavelength range with 512 data points at 0.01 z step
027 public static final double max = 5.12; // this upper bound for redshift excludes only 12 quasars
028 public static final int cutoff = 12; // number of quasars which fall outside the min to max redshift range
029 public static final DecimalFormat zFormatter = new DecimalFormat("0.00000"); // redshift precision
030
031 public static void main(String[] args) {
032 try {
033 List<Quasar> quasars = Quasar.readQuasars();
034 // optional tasks :
035 getStatistics(quasars, "z");
036 // Generate an equal number of random lines to verify distribution
037 List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff);
038 getStatistics(randomQuasars, "randomz");
039 int numberOfSimulations = 1000;
040 getVarianceOfRandomQuasars(randomQuasars, numberOfSimulations);
041 } catch (Exception e) {
042 System.out.println("Exception" + e);
043 System.exit(0);
044 }
045 }
046
047 public static void getVarianceOfRandomQuasars(List<Quasar> quasars, int aNumberOfSimulations) throws java.io.IOException {
048 int smallBinSize = 8192;
049 int maximum[] = new int[smallBinSize];
050 int minimum[] = new int[smallBinSize];
051 int sum[] = new int[smallBinSize];
052 int sum2[] = new int[smallBinSize];
053 java.util.Arrays.fill(minimum, Integer.MAX_VALUE); // set to upper bound for integers for collecting minima
054 for (int n = 0; n < aNumberOfSimulations; n++) {
055 List<Quasar> randomQuasars = getRandomQuasars(quasars, quasars.size() - cutoff);
056
057 // produce dz = (5.12/8192) = 0.000625 bin size histogram of random lines
058 int histogram[] = new int[smallBinSize];
059 for (Quasar qso : randomQuasars) {
060 double z = qso.getZem();
061 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
062 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
063 }
064
065 // compute bin dz = 8 * 0.000625 = .005 histogram of random lines and save min, max, sum and sum2
066 int histogram8[] = new int[smallBinSize];
067 for (int i = 0; i < smallBinSize; i++) {
068 int box = 0;
069 int j = 8; // Choose bin size 8 * 0.000625 = .005
070 for (int k = 0; k < j; k++) { // centered sum
071 int offset = i + k - j/2;
072 if (offset >= 0 && offset < smallBinSize) box += histogram[offset];
073 }
074 histogram8[i] = box;
075 sum[i] += box;
076 sum2[i] += box*box;
077 if (box < minimum[i]) minimum[i] = box;
078 if (box > maximum[i]) maximum[i] = box;
079 }
080 }
081
082 // produce dz = (5.12/8192) = 0.000625 bin size histogram of observed redshifts
083 int realHistogram[] = new int[smallBinSize];
084 for (Quasar qso : quasars) {
085 double z = qso.getZem();
086 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
087 if (binElement >= 0 && binElement < smallBinSize) realHistogram[binElement]++;
088 }
089
090 // compute bin dz = 8 * 0.000625 = .005 histogram of observed redshifts
091 int realHistogram8[] = new int[smallBinSize];
092 for (int i = 0; i < smallBinSize; i++) {
093 int box = 0;
094 int j = 8; // Choose bin size 8 * 0.000625 = .005
095 for (int k = 0; k < j; k++) { // centered sum
096 int offset = i + k - j/2;
097 if (offset >= 0 && offset < smallBinSize) box += realHistogram[offset];
098 }
099 realHistogram8[i] = box;
100 }
101
102
103 FileWriter writer = new FileWriter("data/zvariance" + aNumberOfSimulations + ".txt");
104 double average[] = new double[smallBinSize];
105 double standarddev[]= new double[smallBinSize];
106 for (int i = 0; i < smallBinSize; i++) {
107 average[i] = (double) sum[i] / (double) aNumberOfSimulations;
108 double s = (double) sum[i];
109 double s2 = (double) sum2[i];
110 double num = (double)aNumberOfSimulations;
111
112 standarddev[i]= Math.sqrt((s2-s*s/num)/(num-1.0));
113
114 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
115 double significance = 0.0; // clip negative deviations to zero
116 double deviation = ((double) realHistogram8[i]) - average[i];
117 if (deviation > 0.0 && standarddev[i] != 0.0) significance = deviation/standarddev[i];
118
119 writer.write(zFormatter.format(z) + " " +
120 formatFloatCount(average[i]) + " " +
121 formatCount(minimum[i]) + " " +
122 formatCount(maximum[i]) + " " +
123 formatFloatCount(average[i] + standarddev[i]) + " " +
124 formatFloatCount(average[i] + 2*standarddev[i]) + " " +
125 formatFloatCount(average[i] + 3*standarddev[i]) + " " +
126 formatFloatCount(average[i] + 4*standarddev[i]) + " " +
127 formatFloatCount(average[i] + 5*standarddev[i]) + " " +
128 formatCount(realHistogram8[i]) + " " +
129 formatFloatCount(significance) + " " +
130 "\n");
131
132 }
133 writer.close();
134 }
135
136 /** Get statistics from a list of quasar redshifts
137 *
138 * @param quasars quasars instances (each having a redshift property)
139 * @param fileName the filename
140 */
141 public static void getStatistics(List<Quasar> quasars, String fileName) throws java.io.IOException {
142 int binSize = 8192; // should be named maxBinCount
143
144 int smallBinSize = 8192; // should be named maxSmallBinCount
145 double halfBinSize = (max - min) / ((double) (2 * binSize));
146 int histogram[] = new int[smallBinSize];
147 int histogram1[] = new int[binSize];
148 int histogram2[] = new int[binSize];
149
150 for (Quasar qso : quasars) {
151 double z = qso.getZem();
152 int binElement = (int) (((double)binSize)*(z-min)/(max-min));
153 if (binElement >= 0 && binElement < binSize) histogram1[binElement]++;
154
155 // shift histogram by 1/2 a bin size (in case a narrow group is split across two bins)
156 binElement = (int) (((double)binSize)*(z-min-halfBinSize)/(max-min));
157 if (binElement >= 0 && binElement < binSize) histogram2[binElement]++;
158
159 // produce 1 A histogram
160 binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
161 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
162 }
163
164 // Output a single histogram for the specified bin size
165
166 FileWriter writer = new FileWriter("data/" + fileName + "histogram" + binSize + ".txt");
167 for (int i = 0; i < binSize; i++) {
168 double z = ((double)i)*(max-min) / ((double)binSize) + min;
169 writer.write(zFormatter.format(z) + " " + formatCount(histogram1[i]) + " " + formatCount(histogram2[i]) + "\n");
170 }
171 writer.close();
172
173 // Output multiple histograms starting with bin size 5.12/8192 and increasing in powers of 2
174
175 writer = new FileWriter("data/" + fileName + "histogram.txt");
176 for (int i = 0; i < smallBinSize; i++) {
177 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
178 writer.write(zFormatter.format(z) + " ");
179 for (int j = 1; j < 256; j = j*2) {
180 int sum = 0;
181 // centered sum
182 for (int k = 0; k < j; k++) {
183 int offset = i + k - j/2;
184 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
185 }
186 writer.write(formatCount(sum) + " ");
187 // TODO: append at end of line a code for each histogram
188 // which exceeds a moving average defined threshold (perhaps bin 64 which is smooth)
189 }
190 writer.write("\n");
191 }
192 writer.close();
193 }
194
195 // TODO: Fix this to allow histogram counts greater than 999
196 public static String formatCount(int aCount) {
197 String count = Integer.toString(aCount);
198 switch (count.length()) {
199 case 1: count = " " + count; break;
200 case 2: count = " " + count; break;
201 case 3: count = " " + count; break;
202 case 4: count = " " + count; break;
203 case 5:
204 }
205 return count;
206 }
207
208 static final DecimalFormat fFormatter = new DecimalFormat("0.00");
209 public static String formatFloatCount(double aCount) {
210 String count = fFormatter.format(aCount);
211 switch (count.length()) {
212 case 4: count = " " + count;
213 }
214 return count;
215 }
216
217 /** Monte-Carlo generation of a specified number of redshift randomly generated
218 * with the same distribution as bin delta z = 0.08 histogram which simulated
219 * the smoothed redshift distribution.
220 *
221 * @param quasars a List of Quasar instances containing actual redshift value.
222 * @param number the number of random redshifts to generate
223 * @return a list of quasar instances with redshifts having the same distribution as the input list
224 */
225 public static List<Quasar> getRandomQuasars(List<Quasar> quasars, int number) {
226
227 int binSize = 8192; // should be named maxBinCount
228
229 // smallBinSize produces dz = 5.12/8192 = 0.000625 bin size for z = 0.00 - 5.12 range
230 int smallBinSize = 8192; // should be named maxSmallBinCount
231 int histogram[] = new int[smallBinSize];
232 int cumulative[] = new int[smallBinSize];
233
234 // produce 1 A bin size histogram
235 for (Iterator x = quasars.iterator(); x.hasNext(); ) {
236 Quasar qso = (Quasar) x.next();
237 double z = qso.getZem();
238 int binElement = (int) (((double)smallBinSize)*(z-min)/(max-min));
239 if (binElement >= 0 && binElement < smallBinSize) histogram[binElement]++;
240 }
241
242 // Compute cumulative sum of probability distribution
243 int cumulativeSum = 0;
244 for (int i = 0; i < smallBinSize; i++) {
245 double z = ((double)i)*(max-min) / ((double)smallBinSize) + min;
246 int sum = 0;
247 int j = 128; // Choose bin size dz = 128 * 5.12/8192 = .08 histogram as probablity distribution
248 for (int k = 0; k < j; k++) { // centered sum
249 int offset = i + k - j/2;
250 if (offset >= 0 && offset < smallBinSize) sum += histogram[offset];
251 }
252 cumulativeSum += sum;
253 cumulative[i] = cumulativeSum;
254 }
255
256 // Normalize cumulative sum :
257
258 List<Quasar> randomQuasars = new LinkedList<Quasar>();
259
260 // Generate random numbers with identical line density distribution
261 for (int i = 0; i < number; i++) {
262 // 1. Generate a random integer between 0 and cumulativeSum
263 int randint = (int) (Math.random() * ((double) cumulativeSum));
264 //System.out.println("Random number = " + randint);
265 // 2. locate wavelength bin with equal value using binary search of cumulative distribution
266 int binElement = smallBinSize / 2; // initial bin number guess
267 boolean located = false; // flag indicating that we have found the bin
268 int interval = smallBinSize / 2;
269 while (!located) {
270 interval = interval / 2;
271 int sum = cumulative[binElement];
272
273 if (randint == sum || interval == 0) located = true;
274 else if (randint < sum) binElement = binElement - interval;
275 else if (randint > sum) binElement = binElement + interval;
276
277 //System.out.println("Guess : " + interval + " bin = " + binElement + " sum = " + sum);
278 }
279 // convert bin number to z
280 double z = ((double)binElement) * (max - min) / ((double)smallBinSize) + min;
281 Quasar qso = new Quasar(z);
282 randomQuasars.add(qso);
283 }
284
285
286 //don't sort random redshift list because we might want to pick a subset
287 //Collections.sort(randomLines);
288 return randomQuasars;
289 }
290
291 }
292