8 years ago · 46c21951c2
--- a/bi.bib
+++ b/bi.bib
@@ -0,0 +1,55 @@
 
				+@book{Knuth,
			
 
				+ author = {Knuth, Donald E.},
			
 
				+ title = {The Art of Computer Programming, Volume 2 (3rd Ed.): Seminumerical Algorithms},
			
 
				+ page = {142},
			
 
				+ year = {1997},
			
 
				+ isbn = {0-201-89684-2},
			
 
				+ publisher = {Addison-Wesley Longman Publishing Co., Inc.},
			
 
				+ address = {Boston, MA, USA},
			
 
				+} 
			
 
				+ 
			
 
				+@misc{Magnus,
			
 
				+  author = {Magnus Wahlström},
			
 
				+ title = {discrcalc.tar.gz},
			
 
				+ url = {http://people.mpi-inf.mpg.de/~wahl/},
			
 
				+ keywords = {discrepancy, supersampling},
			
 
				+} 
			
 
				+
			
 
				+@article{Dobkin,
			
 
				+ author = {Dobkin, David P. and Eppstein, David and Mitchell, Don P.},
			
 
				+ title = {Computing the Discrepancy with Applications to Supersampling Patterns},
			
 
				+ journal = {ACM Trans. Graph.},
			
 
				+ issue_date = {Oct. 1996},
			
 
				+ volume = {15},
			
 
				+ number = {4},
			
 
				+ month = oct,
			
 
				+ year = {1996},
			
 
				+ issn = {0730-0301},
			
 
				+ pages = {354--376},
			
 
				+ numpages = {23},
			
 
				+ url = {http://doi.acm.org/10.1145/234535.234536},
			
 
				+ doi = {10.1145/234535.234536},
			
 
				+ acmid = {234536},
			
 
				+ publisher = {ACM},
			
 
				+ address = {New York, NY, USA},
			
 
				+ keywords = {discrepancy, supersampling},
			
 
				+} 
			
 
				+
			
 
				+@inproceedings{Doerr,
			
 
				+ author = {Doerr, Carola and De Rainville, Fran\c{c}ois-Michel},
			
 
				+ title = {Constructing Low Star Discrepancy Point Sets with Genetic Algorithms},
			
 
				+ booktitle = {Proceedings of the 15th Annual Conference on Genetic and Evolutionary Computation},
			
 
				+ series = {GECCO '13},
			
 
				+ year = {2013},
			
 
				+ isbn = {978-1-4503-1963-8},
			
 
				+ location = {Amsterdam, The Netherlands},
			
 
				+ pages = {789--796},
			
 
				+ numpages = {8},
			
 
				+ url = {http://doi.acm.org/10.1145/2463372.2463469},
			
 
				+ doi = {10.1145/2463372.2463469},
			
 
				+ acmid = {2463469},
			
 
				+ publisher = {ACM},
			
 
				+ address = {New York, NY, USA},
			
 
				+ keywords = {algorithm engineering, genetic algorithms, geometric discrepancy, information-based complexity, monte-carlo methods, search heuristics},
			
 
				+} 
			
 
				+
			
--- a/main.tex
+++ b/main.tex
@@ -56,7 +56,8 @@ Experiments were conducted on two machines:
 
				 On these machines, some basic profiling has make clear that 
			
 
				 the main bottleneck of the computations is hiding in the \emph{computation
			
 
				 of the discrepancy}. The chosen algorithm and implantation of this 
			
 
				-cost function is the DEM-algorithm of \emph{Magnus Wahlstr\o m}.\medskip
			
 
				+cost function is the DEM-algorithm~\cite{Dobkin} of 
			
 
				+\emph{Magnus Wahlstr\o m}~\cite{Magnus}.\medskip
			
 
				 
			
 
				 All the experiments has been conducted on dimension 2,3,4 
			
 
				 --- with a fixed Halton basis 7, 13, 29, 3 ---. Some minor tests have
			
@@ -90,6 +91,10 @@ the implemented heuristics.
 
				 \end{mdframed}
			
 
				 \end{figure}
			
 
				 
			
 
				+Graph are presented not with the usual "mustache boxes" to show the 
			
 
				+error bounds, but in a more graphical way with error bands. The graph
			
 
				+of the mean result is included inside a band of the same color which
			
 
				+represents the incertitude with regards to the values obtained.
			
 
				 
			
 
				 \section{Heuristics developed}
			
 
				 
			
@@ -117,7 +122,8 @@ the implemented heuristics.
 
				 The Fisher–Yates shuffle is an algorithm for generating a random permutation 
			
 
				 of a finite sets. The Fisher–Yates shuffle is unbiased, so that every 
			
 
				 permutation is equally likely. We present here the Durstenfeld variant of 
			
 
				-the algorithm, presented by Knuth in \emph{The Art of Computer programming}.
			
 
				+the algorithm, presented by Knuth in \emph{The Art of Computer programming}
			
 
				+vol. 2~\cite{Knuth}.
			
 
				 The algorithm's time complexity is here $O(n)$, compared to $O(n^2)$ of 
			
 
				 the naive implementation.
			
 
				 
			
@@ -157,10 +163,18 @@ the naive implementation.
 
				 \subsubsection{Results and stability}
			
 
				 We first want to analyze the dependence of the results on the number of 
			
 
				 iterations of the heuristic, in order to discuss its stability. 
			
 
				-The results are compiled in the figures~\ref{rand_iter2}, \ref{rand_iter3}.
			
 
				+The results are compiled in the figures~\ref{rand_iter2},~\ref{rand_iter3},
			
 
				+restricted to a number of points between 80 and 180.
			
 
				+We emphasize on the fact the lots of datas appears on the graphs, 
			
 
				+and the error bands representation make them a bit messy. These graphs
			
 
				+were made for extensive internal experiments and parameters researches.
			
 
				+The final wrap up graphs are much more lighter and only presents the best 
			
 
				+results obtained.
			
 
				 As expected from a fully random search, the error bands are very large for 
			
 
				-low number of iterations ($15\%$ of the value for 200 iterations) and tends
			
 
				+low number of iterations ($15\%$ of the value for 400 iterations) and tends
			
 
				 to shrink with a bigger number of iterations (around $5\%$ for 1600 iterations).
			
 
				+This shrinkage is a direct consequence of well known concentrations bounds
			
 
				+(Chernoff and Asuma-Hoeffding).
			
 
				 The average results are quite stable, they decrease progressively with 
			
 
				 the growing number of iterations, but seems to get to a limits after 1000 
			
 
				 iterations. This value acts as a threshold for the interesting number of iterations.
			
@@ -180,8 +194,8 @@ discrepancy and this heuristic.
 
				 \end{figure}
			
 
				 
			
 
				 \subsection{Evolutionary heuristic: Simulated annealing and local search}
			
 
				-The second heuristic implemented is a randomiezd local search with 
			
 
				-simmulated annealing. This heuristic is inspired by the physical 
			
 
				+The second heuristic implemented is a randomized local search with 
			
 
				+simulated annealing. This heuristic is inspired by the physical 
			
 
				 process of annealing in metallurgy.
			
 
				 Simulated annealing interprets the physical slow cooling as a 
			
 
				 slow decrease in the probability of accepting worse solutions as it 
			
@@ -189,9 +203,9 @@ explores the solution space.
 
				 More precisely the neighbours are here the permutations which can be obtained
			
 
				 by application of exactly one transposition of the current permutation.
			
 
				 The selection phase is dependant on the current temperature:
			
 
				-after applaying a random transposition on the current permutation, either
			
 
				-the discrepency of the corresponding Halton set is decreased and the 
			
 
				-evolution is keeped, either it does not but is still keeped with 
			
 
				+after applying a random transposition on the current permutation, either
			
 
				+the discrepancy of the corresponding Halton set is decreased and the 
			
 
				+evolution is kept, either it does not but is still kept with 
			
 
				 a probability $e^{\frac{\delta}{T}}$ where $\delta$ is the difference
			
 
				 between the old and new discrepancy, and $T$ the current temperature.
			
 
				 The all algorithm is described in the flowchart~\ref{flow_rec}.
			
@@ -205,30 +219,56 @@ The all algorithm is described in the flowchart~\ref{flow_rec}.
 
				 \end{figure}
			
 
				 
			
 
				 \subsubsection{Dependence on the temperature}
			
 
				-First exeriements were made to select the best initial temperature.
			
 
				+First experiments were made to select the best initial temperature.
			
 
				+Results are compiled in graphs~\ref{temp_2},~\ref{temp3},\ref{temp3_z}.
			
 
				+Graphs~\ref{temp_2},~\ref{temp3} represents the results obtained respectively
			
 
				+in dimension 2 and 3 between 10 and 500 points. The curve obtained is 
			
 
				+characteristic of the average evolution of the discrepancy optimization 
			
 
				+algorithms for Halton points sets: a very fast decrease for low number of 
			
 
				+points --- roughly up to 80 points --- and then a very slow one 
			
 
				+after~\cite{Doerr}.
			
 
				+The most interesting part of these results are concentrated between 80 and 160
			
 
				+points were the different curves splits. The graph~\ref{temp3_z} is a zoom 
			
 
				+of~\ref{temp3} in this window. We remark on that graph that the lower the 
			
 
				+temperature is, the best the results are.
			
 
				+
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_temp3.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_2_temp.png}
			
 
				+\caption{Dependence on initial temperature: D=2}
			
 
				+  \label{temp_2}
			
 
				 \end{figure}
			
 
				 
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_temp3_zoom.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_temp3.png}
			
 
				+\caption{Dependence on initial temperature: D=3}
			
 
				+  \label{temp3}
			
 
				 \end{figure}
			
 
				+
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_2_temp.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_temp3_zoom.png}
			
 
				+\caption{Dependence on initial temperature (zoom): D=3}
			
 
				+  \label{temp3_z}
			
 
				 \end{figure}
			
 
				 
			
 
				+
			
 
				 \subsubsection{Stability with regards to the number of iterations}
			
 
				 
			
 
				+As for the fully random search heuristic we investigated the stability
			
 
				+of the algorithm with regards to the number of iterations. We present here
			
 
				+the result in dimension 3 in the graph~\ref{iter_sa}. Once again we
			
 
				+restricted the window between 80 and 180 points were curves are split.
			
 
				+An interesting phenomena can be observed: the error rates are somehow 
			
 
				+invariant w.r.t.\ the number of iteration and once again the 1000 iterations
			
 
				+threshold seems to appear --- point 145 is a light split between iteration 
			
 
				+1600 and the others, but excepted for that point, getting more than 1000
			
 
				+iterations tends be be a waste of time. The error rate is for 80 points the
			
 
				+biggest and is about $15\%$ of the value, which is similar to the error
			
 
				+rates for fully random search with 400 iterations.
			
 
				+
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				 \includegraphics[scale=0.3]{Results/sa_iter.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\caption{Dependence on iterations number for simulated annealing : D=3}
			
 
				+  \label{iter_sa}
			
 
				 \end{figure}
			
 
				 
			
 
				 \subsection{Genetic (5+5) search}
			
@@ -269,5 +309,6 @@ First exeriements were made to select the best initial temperature.
 
				 
			
 
				 
			
 
				 \section{Conclusion}
			
 
				-
			
 
				+\bibliographystyle{alpha}
			
 
				+\bibliography{bi}
			
 
				 \end{document}
			
--- a/main.tex.bak
+++ b/main.tex.bak
@@ -48,19 +48,19 @@ Experiments were conducted on two machines:
 
				 \end{itemize}
			
 
				 
			
 
				 \begin{figure}
			
 
				-  \label{main_flow}
			
 
				 \includegraphics[scale=0.6]{main_flow.pdf}
			
 
				 \caption{Tool overview}
			
 
				+  \label{main_flow}
			
 
				 \end{figure}
			
 
				 
			
 
				 On these machines, some basic profiling has make clear that 
			
 
				 the main bottleneck of the computations is hiding in the \emph{computation
			
 
				-of the discrepency}. The chosen algorithm and implantation of this 
			
 
				+of the discrepancy}. The chosen algorithm and implantation of this 
			
 
				 cost function is the DEM-algorithm of \emph{Magnus Wahlstr\o m}.\medskip
			
 
				 
			
 
				 All the experiments has been conducted on dimension 2,3,4 
			
 
				 --- with a fixed Halton basis 7, 13, 29, 3 ---. Some minor tests have
			
 
				-been made in order to discuss the dependency of the discrepency and 
			
 
				+been made in order to discuss the dependency of the discrepancy and 
			
 
				 efficiency of the heuristics with regards to the values chosen for the
			
 
				 prime base. The average results remains roughly identical when taking 
			
 
				 changing these primes and taking them in the range [2, 100]. For such
			
@@ -78,26 +78,30 @@ extremal values are also given in order to construct error bands graphs.
 
				 
			
 
				 A flowchart of the conduct of one experiment is described in the 
			
 
				 flowchart~\ref{insight_flow}. The number of iteration of the heuristic is 
			
 
				-I and the number of full restart is N. Th efunction Heuristic() correspond to
			
 
				-a single step of the chosen heursitic. We now present an in-depth view of
			
 
				+I and the number of full restart is N. Th function Heuristic() correspond to
			
 
				+a single step of the chosen heuristic. We now present an in-depth view of
			
 
				 the implemented heuristics.
			
 
				 
			
 
				 \begin{figure}
			
 
				  \begin{mdframed}
			
 
				-  \label{insight_flow}
			
 
				 \includegraphics[scale=0.4]{insight.pdf}
			
 
				 \caption{Flowchart of a single experiment}
			
 
				+\label{insight_flow}
			
 
				 \end{mdframed}
			
 
				 \end{figure}
			
 
				 
			
 
				+Graph are presentd not with the usual "mustache boxes" to show the 
			
 
				+error bounds, but in a more graphical way with error bands. The graph
			
 
				+of the mean result is included inside a band of the same color which
			
 
				+represents the incertitude with regards to the values obtained.
			
 
				 
			
 
				 \section{Heuristics developed}
			
 
				 
			
 
				 \subsection{Fully random search (Test case)}
			
 
				- The first heuristic implemented is rge random search. We generates
			
 
				+ The first heuristic implemented is the random search. We generates
			
 
				  random sets of Halton points and select the best set with regard to its
			
 
				  discrepancy iteratively. The process is wrapped up in the 
			
 
				- flowchart~\ref{rand_flow}. In order to generate at each step a random 
			
 
				+ flowchart~\ref{random_flow}. In order to generate at each step a random 
			
 
				  permutation, we transform it directly from the previous one.
			
 
				   More precisely the permutation is a singleton object which have method 
			
 
				   random, built on the Knuth Fisher Yates shuffle. This algorithm allows
			
@@ -105,9 +109,9 @@ the implemented heuristics.
 
				   this fact and detail the algorithm in the following section.
			
 
				 \begin{figure}
			
 
				  \begin{mdframed}
			
 
				-  \label{rand_flow}
			
 
				 \includegraphics[scale=0.4]{flow_rand.pdf}
			
 
				 \caption{Flowchart of the random search}
			
 
				+  \label{random_flow}
			
 
				 \end{mdframed}
			
 
				 \end{figure}
			
 
				 
			
@@ -155,66 +159,113 @@ the naive implementation.
 
				 
			
 
				 
			
 
				 \subsubsection{Results and stability}
			
 
				-
			
 
				 We first want to analyze the dependence of the results on the number of 
			
 
				 iterations of the heuristic, in order to discuss its stability. 
			
 
				-The results are compiled in the figures~\ref{rand_iter2}\ref{rand_iter3}.
			
 
				+The results are compiled in the figures~\ref{rand_iter2},~\ref{rand_iter3},
			
 
				+restricted to a number of points between 80 and 180.
			
 
				+We emphazies on the fact the lots of datas appears on the graphs, 
			
 
				+and the error bands representation make them a bit messy. These graphs
			
 
				+were made for extensive internal experiments and parameters researches.
			
 
				+The final wrap up graphs are much more lighter and only presents the best 
			
 
				+results obtained.
			
 
				 As expected from a fully random search, the error bands are very large for 
			
 
				-low number of iterations ($15\%$ of the value for 200 iterations) and tends
			
 
				-to shrink with a bigger number of iterations (arround $5\%$ for 1600 iterations).
			
 
				+low number of iterations ($15\%$ of the value for 400 iterations) and tends
			
 
				+to shrink with a bigger number of iterations (around $5\%$ for 1600 iterations).
			
 
				+This shrinkage is a direct consequence of well known concentrations bounds
			
 
				+(Chernoff and Asuma-Hoeffding).
			
 
				 The average results are quite stable, they decrease progressively with 
			
 
				-the growing number of iterations, but seems to get to a limits after 1000 iterations. This value acts as a threshold for the interesting number of iterations.
			
 
				+the growing number of iterations, but seems to get to a limits after 1000 
			
 
				+iterations. This value acts as a threshold for the interesting number of iterations.
			
 
				 As such interesting results can be conducted with \emph{only} 1000 iterations, 
			
 
				-without alterating too much the quality of the set with regards to its
			
 
				-discrepency and this heuristic.
			
 
				+without altering too much the quality of the set with regards to its
			
 
				+discrepancy and this heuristic.
			
 
				 
			
 
				 \begin{figure}
			
 
				-  \label{rand_iter2}
			
 
				 \includegraphics[scale=0.3]{Results/random_iter.png}
			
 
				-\caption{Dependence on iterations number: D=2}
			
 
				+\caption{Dependence on iterations, dimension 2}
			
 
				+\label{rand_iter2}
			
 
				 \end{figure}
			
 
				 \begin{figure}
			
 
				-  \label{rand_iter3}
			
 
				 \includegraphics[scale=0.3]{Results/random_iter_3.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\caption{Dependence on iterations, dimension 3}
			
 
				+\label{rand_iter3}
			
 
				 \end{figure}
			
 
				 
			
 
				-
			
 
				-
			
 
				 \subsection{Evolutionary heuristic: Simulated annealing and local search}
			
 
				+The second heuristic implemented is a randomiezd local search with 
			
 
				+simmulated annealing. This heuristic is inspired by the physical 
			
 
				+process of annealing in metallurgy.
			
 
				+Simulated annealing interprets the physical slow cooling as a 
			
 
				+slow decrease in the probability of accepting worse solutions as it 
			
 
				+explores the solution space. 
			
 
				+More precisely the neighbours are here the permutations which can be obtained
			
 
				+by application of exactly one transposition of the current permutation.
			
 
				+The selection phase is dependant on the current temperature:
			
 
				+after applaying a random transposition on the current permutation, either
			
 
				+the discrepency of the corresponding Halton set is decreased and the 
			
 
				+evolution is keeped, either it does not but is still keeped with 
			
 
				+a probability $e^{\frac{\delta}{T}}$ where $\delta$ is the difference
			
 
				+between the old and new discrepancy, and $T$ the current temperature.
			
 
				+The all algorithm is described in the flowchart~\ref{flow_rec}.
			
 
				+
			
 
				 \begin{figure}
			
 
				  \begin{mdframed}
			
 
				-  \label{rand_flow}
			
 
				 \includegraphics[scale=0.4]{flow_recuit.pdf}
			
 
				 \caption{Flowchart of the simulated annealing local search heuristic}
			
 
				+\label{flow_rec}
			
 
				 \end{mdframed}
			
 
				 \end{figure}
			
 
				 
			
 
				 \subsubsection{Dependence on the temperature}
			
 
				+First experiements were made to select the best initial temperature.
			
 
				+Results are compiled in graphs~\ref{temp_2},~\ref{temp3},\ref{temp3_z}.
			
 
				+Graphs~\ref{temp_2},~\ref{temp3} represents the results obtained respectively
			
 
				+in dimension 2 and 3 between 10 and 500 points. The curve obtained is 
			
 
				+characteristic of the average evolution of the discrepancy optimisation 
			
 
				+algorithms for Halton points sets: a very fast decrease for low number of 
			
 
				+points --- roughly up to 80 points --- and then a very slow one after.
			
 
				+The most intersting part of these results are concentred between 80 and 160
			
 
				+points were the different curves splits. The graph~\ref{temp3_z} is a zoom 
			
 
				+of~\ref{temp3} in this window. We remark on that graph that the lower the 
			
 
				+temperature is, the best the results are.
			
 
				 
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_temp3.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_2_temp.png}
			
 
				+\caption{Dependence on initial temperature: D=2}
			
 
				+  \label{temp_2}
			
 
				 \end{figure}
			
 
				 
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_temp3_zoom.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_temp3.png}
			
 
				+\caption{Dependence on initial temperature: D=3}
			
 
				+  \label{temp3}
			
 
				 \end{figure}
			
 
				+
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				-\includegraphics[scale=0.3]{Results/resu_2_temp.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\includegraphics[scale=0.3]{Results/resu_temp3_zoom.png}
			
 
				+\caption{Dependence on initial temperature (zoom): D=3}
			
 
				+  \label{temp3_z}
			
 
				 \end{figure}
			
 
				 
			
 
				+
			
 
				 \subsubsection{Stability with regards to the number of iterations}
			
 
				 
			
 
				+As for the fully random search heursitic we invatigated the stability
			
 
				+of the algorithm with regards to the number of iterations. We present here
			
 
				+the result in dimension 3 in the graph~\ref{iter_sa}. Once again we
			
 
				+resticted the window between 80 and 180 points were curves are splited.
			
 
				+An interesting phenomena can be observed: the error rates are somehow 
			
 
				+invariant w.r.t.\ the number of iteration and once again the 1000 iterations
			
 
				+threshold seems to appear --- point 145 is a light split between iteration 
			
 
				+1600 and the others, but excpeted for that point, getting more than 1000
			
 
				+iterations tends be be a waste of time. The error rate is for 80 points the
			
 
				+biggest and is about $15\%$ of the value, which is similar to the error
			
 
				+rates for fully random search with 400 iterations.
			
 
				+
			
 
				 \begin{figure}
			
 
				-  \label{rand_flow}
			
 
				 \includegraphics[scale=0.3]{Results/sa_iter.png}
			
 
				-\caption{Dependence on iterations number: D=3}
			
 
				+\caption{Dependence on iterations number for simmulated annealing : D=3}
			
 
				+  \label{iter_sa}
			
 
				 \end{figure}
			
 
				 
			
 
				 \subsection{Genetic (5+5) search}