On Wed, 1 Apr 2026 16:34:47 +0300
Michael S <[email protected]> wrote:
<snip>
My implementation #1.
It is based on the proof provided by somebody with abstract math
attitude.
He said that his proof is something that the 5th grade schoolchildren
could find with in 5 minutes.
I am no 5th pupil any longer. Even understanding of his proof took
me significantly longer than 5 minutes :(
Here is my attempt of translation (original proof was not in English):
"Solution would be done by induction by number of cookies of each sort
(the same for all sorts).
For K=2 the solution is simple. Please figure it out yourself.
Now let's take K > 2 and postulate that for numbers of cookies per box
that are smaller than K the assertion already proven.
Let's take some disposition for which the assertion is correct and
remove the corresponding "thread" (i.e. one cookie from each box, all
of different sorts). Then supposition of induction is correct for
remaining disposition, which means that we can remove yet another
thread, then yet another etc.. for a total of K threads. That is,
for K>2 there are more than 2 threads.
Now let's exchange places between any 2 two cookies from different
boxes. Since by doing so we are touching 1 or 2 threads then there
still be at least one thread untouched (at least k-2 threads, actually [M.S.]). It means that the property "we can remove a thread" is
not disturbed by our action (i.e. by exchange of two cookies).
What is left it is to realize that by means of such exchanges any
disposition can be mutated from any other disposition.
That's all."
And here is a code. It is not very fast. But the speed is almost the
same on average and in the worst case.
#include <stdint.h>
#include <stdbool.h>
#include "solver.h"
peek_t solver(boxes_t boxes)
{
uint8_t na[N_BOXES] = {0};
typedef struct { uint8_t s,d; } xch_t;
xch_t xch[N_BOXES*BOX_SIZE];
int i = 0;
for (int src_bi = 0; src_bi < N_BOXES; ++src_bi) {
for (int src_ci = na[src_bi]; src_ci < BOX_SIZE; ++src_ci) {
uint8_t dst_bi = boxes.b[src_bi][src_ci];
while (dst_bi != src_bi) {
uint8_t dst_ci = na[dst_bi];
uint8_t dst_v = boxes.b[dst_bi][dst_ci];
na[dst_bi] = dst_ci+1;
xch[i++] = (xch_t){ .s=src_bi, .d=dst_bi};
dst_bi = dst_v;
}
xch[i++] = (xch_t){ .s=src_bi, .d=dst_bi};
}
na[src_bi] = BOX_SIZE;
}
for (int bi = 0; bi < N_BOXES; ++bi)
for (int ci = 0; ci < BOX_SIZE; ++ci)
boxes.b[bi][ci] = bi;
uint8_t tx[N_BOXES][BOX_SIZE];
for (int bi = 0; bi < N_BOXES; ++bi)
for (int ci = 0; ci < BOX_SIZE; ++ci)
tx[bi][ci] = ci;
peek_t threads[BOX_SIZE];
for (int ci = 0; ci < BOX_SIZE; ++ci)
for (int bi = 0; bi < N_BOXES; ++bi)
threads[ci].b[bi] = ci;
for (int i = N_BOXES*BOX_SIZE-1; i >= 0; --i) {
uint8_t src_bi = xch[i].s;
uint8_t dst_bi = xch[i].d;
uint8_t dst_ci = na[dst_bi] - 1;
na[dst_bi] = dst_ci;
if (src_bi != dst_bi) {
uint8_t src_ci = na[src_bi];
uint8_t src_v = boxes.b[src_bi][src_ci];
if (src_v != dst_bi) {
uint8_t src_t = tx[src_bi][src_ci];
uint8_t dst_t = tx[dst_bi][dst_ci];
if (src_t != dst_t) {
// 2 threads broken. Fix
uint8_t v2bi[N_BOXES];
for (int bi = 0; bi < N_BOXES; ++bi)
v2bi[boxes.b[bi][threads[src_t].b[bi]]] = bi;
boxes.b[src_bi][src_ci] = dst_bi;
boxes.b[dst_bi][dst_ci] = src_v;
for (uint8_t v = dst_bi; v != src_v;) {
uint8_t bi = v2bi[v];
uint8_t c0 = threads[src_t].b[bi];
uint8_t c1 = threads[dst_t].b[bi];
threads[src_t].b[bi] = c1;
threads[dst_t].b[bi] = c0;
tx[bi][c1] = src_t;
tx[bi][c0] = dst_t;
v = boxes.b[bi][c1];
}
} else {
boxes.b[src_bi][src_ci] = dst_bi;
boxes.b[dst_bi][dst_ci] = src_v;
}
}
}
}
return threads[0];
}
On Wed, 1 Apr 2026 16:34:47 +0300
Michael S <[email protected]> wrote:
<snip>
My solution #2.
That is my own, with no proof preceding code. It went in other direction
- from hacking the code to proving it later.
It is pretty fast. Can be made faster yet by replication of some code
and re-arrangement in the find() routine, but for purposes of
illustration I wanted it short.
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "solver.h"
static
uint8_t solver_find(
const uint8_t b_idx[N_BOXES][N_BOXES],
uint8_t result[N_BOXES],
const uint8_t free[N_BOXES],
int n_free,
uint8_t v,
const uint8_t bx[N_BOXES][BOX_SIZE])
{
typedef struct {
uint8_t parent, bi;
} db_rec;
db_rec db[N_BOXES];
bool valid[N_BOXES] = {0};
uint8_t que[N_BOXES], *wr = que, *rd = que;
valid[v] = true;
*wr++ = v;
for (;;) {
uint8_t vv = *rd++;
for (int i = 0; i < n_free; ++i) {
uint8_t bi = free[i];
uint8_t ci = b_idx[bi][vv];
if (ci != BOX_SIZE) {
result[bi] = ci;
while (vv != v) {
bi = db[vv].bi;
vv = db[vv].parent;
result[bi] = b_idx[bi][vv];
};
return i;
}
}
// add neighbors to database
for (int bi = 0; bi < N_BOXES; ++bi) {
if (b_idx[bi][vv] != BOX_SIZE) {
uint8_t a = bx[bi][result[bi]];
if (!valid[a]) {
valid[a] = true;
db[a] = (db_rec){ .parent = vv, .bi = bi };
*wr++ = a;
}
}
}
}
}
peek_t solver(boxes_t boxes)
{
// build index
uint8_t b_idx[N_BOXES][N_BOXES];
memset(b_idx, BOX_SIZE, sizeof(b_idx));
for (int bi = 0; bi < N_BOXES; ++bi) {
for (int ci = 0; ci < BOX_SIZE; ++ci) {
uint8_t v = boxes.b[bi][ci];
if (b_idx[bi][v] == BOX_SIZE)
b_idx[bi][v] = ci;
}
}
uint8_t free_boxes[N_BOXES];
for (int bi = 0; bi < N_BOXES; ++bi)
free_boxes[bi] = bi;
int n_free = N_BOXES;
peek_t result = {0};
bool used_sorts[N_BOXES]={0};
while (n_free > 0) {
int bi = free_boxes[--n_free]; // new set
result.b[bi] = 0;
uint8_t v = boxes.b[bi][0];
used_sorts[v] = true;
uint8_t pend_que[N_BOXES],
*pend_wr = pend_que, *pend_rd = pend_que;
*pend_wr++ = bi;
while (pend_wr != pend_rd && n_free != 0) {
int bk = *pend_rd++;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
v = boxes.b[bk][ci];
if (!used_sorts[v]) {
// new value in set
uint8_t v_fi = solver_find(
b_idx, result.b, free_boxes,
n_free, v, boxes.b);
uint8_t v_bi = free_boxes[v_fi];
used_sorts[v] = true;
*pend_wr++ = v_bi;
free_boxes[v_fi] = free_boxes[--n_free];
// remove from free list
}
}
}
}
return result;
}
On Wed, 08 Apr 2026 08:42:11 -0700
Tim Rentsch <[email protected]> wrote:
Michael S <[email protected]> writes:
[...]
Here I am again, ready now to write a more substantive response.
So, here is the challenge:
There are 20 sorts of cookies, 10 of each sort. They are placed in
20 boxes, again, 10 cookies in each boxes. You have to take exactly
one cookie from each box, all 20 of different sorts.
Smart math heads proved that for any distribution a solution exists.
I don't ask you to repeat the proof. Just peek cookies!
I think the problem is interesting. I thought I would try coding
something up.
It's obvious that one can find the solution by exhausting. Don't do
it. Indeed, the number of possible peek orders is finite, but ii is
large - 2.4e18.
Be smarter.
On modern PC I want to get a solution in less than 1 msec, bonus for
less than 50usec.
[.. C code to drive an exercise a proposed solution ..]
You have to implement function solver() in module solver.c
I ran into trouble when I tried to work within the posted framework
to drive a solver. The difficulties were of several kinds. The
code didn't compile in my standard compilation environment, which
is roughly this (the c99 might be c11 but that made no difference):
gcc -x c -std=c99 -pedantic ...
What were the problems?
My gcc 14.2.0 on msys2 produces no warnings or errors with above flags.
I was able to work through that problem but it was irksome. The
second difficulty is the interface to solver() seems rather clunky
to me, maybe not for the input but for the output, and it wasn't
clear to me what the specification is for the output (I did manage
to figure this out but only much later). I was having to put more
effort into figuring out the interface than solving the problem.
Yes, I should have explained it, at least briefly.
I'd guess that you expected that values in result vector correspond to
sorts of cookies, when in fact they are indices to position of the
cookie in the box.
I am sorry.
Finally I abandoned the idea of working within the structure of the
posted driver code and wrote a solver to my own specification.
Doing that was fairly straightforward and took half an hour or so
(disclaimer: after I had spent a good amount of time just thinking
about the problem). After writing the solver I reimplemented a
driver framework to drive it, conforming to the interface I was
using. Some debugging was needed to get everything working. I did
some rudimentary time measurements for the solver. Results were
good (more about the specifics later).
After doing the new solver I went back to the posted driver code,
and grafted together the new solver with the orginal driver. Some
glue code was needed to reconcile the differences in interface
between the new solver and the original driver. Then I needed to
figure out the semantics of the output, which were different than
what I expected and originally understood. I figured all that out
and got things working. Sadly the code was uglier than I would have
liked.
After that, I talked to a friend to try a different approach to
producing a solution. After some light editing by myself -- mostly
just formatting and some name changes -- the code below was put into
the hopper. (Note: I claim no credit for code below. I did some
editing to make it more readable but beyond that none of it is a
result of my efforts.)
#include <stdint.h>
#include "solver.h"
static uint32_t adjacent[ N_BOXES ];
static int cookie_of_box[ N_BOXES ];
static int box_of_cookie[ N_BOXES ];
static uint32_t seen;
static int
search( int b ){
uint32_t m = adjacent[b];
while( m ){
uint32_t bit = m & -m;
m ^= bit;
int c = __builtin_ctz( bit );
if( seen & 1u<<c ) continue;
seen |= 1u<<c;
if( box_of_cookie[c] == -1 || search( box_of_cookie[c]
) ){ box_of_cookie[c] = b;
cookie_of_box[b] = c;
return 1;
}
}
return 0;
}
peek_t
solver( boxes_t boxes ){
peek_t res;
for( int i = 0; i < N_BOXES; i++ ){
uint32_t m = 0;
for( int k = 0; k < BOX_SIZE; k++ ){
m |= 1u << boxes.b[i][k];
}
adjacent[i] = m;
cookie_of_box[i] = -1;
box_of_cookie[i] = -1;
}
for( int i = 0; i < N_BOXES; i++ ){
seen = 0;
search( i );
}
for( int i = 0; i < N_BOXES; i++ ){
int c = cookie_of_box[i];
for( int k = 0; k < BOX_SIZE; k++ ){
if( boxes.b[i][k] == c ){
res.b[i] = k;
break;
}
}
}
return res;
}
Despite being derived independently, this code uses the same sort of
approach that I had used, except my code was recursive rather than
iterative.
Running the above code with default settings (128 11) produced this
output
o.k.
med=0.003542 msec, min=0.003487 msec, max=0.003911 msec
The timing results for my second code effort were similar, except
that the value for max was about 0.3 msec.
My defaults were chosen for much slower solutions.
For fast solutions like yours or mine with default parameters an
overhead of clock_gettime() call is too significant.
In such case it's better to use rep1 of at least 10000.
Informal timing measurements on my original solver -- in particular
using a different interface, and done simply by using 'time' in the
shell to measure a million calls to the solver -- gave per-call
times that were better by about a factor of five. I need to be
clear that this solver does not conform to the original interface,
and probably most of speedup is due to choosing an interface that
gives an easier fit to the solver.
It's not something I'd expect.
I used "by value" type of interface both for input and for output in
order to reduce a chance of buggy solutions to mess with the test bench.
I fully expect that "by reference" type of interface could be somewhat faster. May be, 1.5x faster for very fast solutions. But I certainly
don't expect a factor of five.
Sorry not to have something better for you. It was a fair amount of
work to produce the results above.
I read your code briefly, but didn't try to understand it yet.
I plugged it into my test bench and measured speed on very old home PC
(Intel i5-3450). With big values of rep1 it was ~3.3 times faster
than my original solution and ~1.9 times slower than my 2nd solution.
So, speed-wise your solution is good.
One thing that I don't particularly like after taking a glance - it
does not appear to be thread-safe. But it is probably very easy to make
it thread safe.
Another thing that I don't particularly like - if we want N_BOXES > 32
then it requires modifications; if we want N_BOXES > 64 then it
requires more serious modifications.
Now few words about my own solutions.
1. Original solution:
This solution is based on the proof that was given to me by somebody
with abstract math mind.
2. It is solution that I found independently, for which I hopefully
have a proof in the head, but I didn't write it out yet and didn't show
yet to said mathematician. It would not surprise me if idea of these solution is the same as yours.
Later today (tonight) I plan to post both solutions here.
Michael S <[email protected]> writes:
On Wed, 08 Apr 2026 08:42:11 -0700
Tim Rentsch <[email protected]> wrote:
Michael S <[email protected]> writes:
[...]
Here I am again, ready now to write a more substantive response.
So, here is the challenge:
There are 20 sorts of cookies, 10 of each sort. They are placed
in 20 boxes, again, 10 cookies in each boxes. You have to take
exactly one cookie from each box, all 20 of different sorts.
Smart math heads proved that for any distribution a solution
exists. I don't ask you to repeat the proof. Just peek cookies!
I think the problem is interesting. I thought I would try coding
something up.
It's obvious that one can find the solution by exhausting. Don't
do it. Indeed, the number of possible peek orders is finite, but
ii is large - 2.4e18.
Be smarter.
On modern PC I want to get a solution in less than 1 msec, bonus
for less than 50usec.
[.. C code to drive an exercise a proposed solution ..]
You have to implement function solver() in module solver.c
I ran into trouble when I tried to work within the posted framework
to drive a solver. The difficulties were of several kinds. The
code didn't compile in my standard compilation environment, which
is roughly this (the c99 might be c11 but that made no difference):
gcc -x c -std=c99 -pedantic ...
What were the problems?
My gcc 14.2.0 on msys2 produces no warnings or errors with above
flags.
These source lines
struct timespec t0;
clock_gettime(CLOCK_MONOTONIC, &t0);
produced these diagnostics
tb.c:134:21: error: storage size of 't0' isn't known
tb.c:135:5: warning: implicit declaration of function
'clock_gettime' [...] tb.c:135:19: error: 'CLOCK_MONOTONIC'
undeclared (first use in this function)
under -std=c99. Apparently the first diagnostic, about
struct timespec, is fixed under -std=c11. It wasn't hard
to fix these, but it was irksome.
I was able to work through that problem but it was irksome. The
second difficulty is the interface to solver() seems rather clunky
to me, maybe not for the input but for the output, and it wasn't
clear to me what the specification is for the output (I did manage
to figure this out but only much later). I was having to put more
effort into figuring out the interface than solving the problem.
Yes, I should have explained it, at least briefly.
I'd guess that you expected that values in result vector correspond
to sorts of cookies, when in fact they are indices to position of
the cookie in the box.
I am sorry.
Yes, that is in fact what I was thinking. Fortunately the difficulty
was resolved when the checker reported a value out of range.
Finally I abandoned the idea of working within the structure of the
posted driver code and wrote a solver to my own specification.
Doing that was fairly straightforward and took half an hour or so
(disclaimer: after I had spent a good amount of time just thinking
about the problem). After writing the solver I reimplemented a
driver framework to drive it, conforming to the interface I was
using. Some debugging was needed to get everything working. I did
some rudimentary time measurements for the solver. Results were
good (more about the specifics later).
After doing the new solver I went back to the posted driver code,
and grafted together the new solver with the orginal driver. Some
glue code was needed to reconcile the differences in interface
between the new solver and the original driver. Then I needed to
figure out the semantics of the output, which were different than
what I expected and originally understood. I figured all that out
and got things working. Sadly the code was uglier than I would
have liked.
After that, I talked to a friend to try a different approach to
producing a solution. After some light editing by myself -- mostly
just formatting and some name changes -- the code below was put
into the hopper. (Note: I claim no credit for code below. I did
some editing to make it more readable but beyond that none of it
is a result of my efforts.)
#include <stdint.h>
#include "solver.h"
static uint32_t adjacent[ N_BOXES ];
static int cookie_of_box[ N_BOXES ];
static int box_of_cookie[ N_BOXES ];
static uint32_t seen;
static int
search( int b ){
uint32_t m = adjacent[b];
while( m ){
uint32_t bit = m & -m;
m ^= bit;
int c = __builtin_ctz( bit );
if( seen & 1u<<c ) continue;
seen |= 1u<<c;
if( box_of_cookie[c] == -1 || search(
box_of_cookie[c] ) ){ box_of_cookie[c] = b;
cookie_of_box[b] = c;
return 1;
}
}
return 0;
}
peek_t
solver( boxes_t boxes ){
peek_t res;
for( int i = 0; i < N_BOXES; i++ ){
uint32_t m = 0;
for( int k = 0; k < BOX_SIZE; k++ ){
m |= 1u << boxes.b[i][k];
}
adjacent[i] = m;
cookie_of_box[i] = -1;
box_of_cookie[i] = -1;
}
for( int i = 0; i < N_BOXES; i++ ){
seen = 0;
search( i );
}
for( int i = 0; i < N_BOXES; i++ ){
int c = cookie_of_box[i];
for( int k = 0; k < BOX_SIZE; k++ ){
if( boxes.b[i][k] == c ){
res.b[i] = k;
break;
}
}
}
return res;
}
Despite being derived independently, this code uses the same sort
of approach that I had used, except my code was recursive rather
than iterative.
Running the above code with default settings (128 11) produced this
output
o.k.
med=0.003542 msec, min=0.003487 msec, max=0.003911 msec
The timing results for my second code effort were similar, except
that the value for max was about 0.3 msec.
My defaults were chosen for much slower solutions.
For fast solutions like yours or mine with default parameters an
overhead of clock_gettime() call is too significant.
In such case it's better to use rep1 of at least 10000.
It turns out that running with a larger number of trials didn't
change the results much. I think my averages got a bit worse and my
worst case got a bit better, but as I recall the difference wasn't
anything dramatic.
Informal timing measurements on my original solver -- in particular
using a different interface, and done simply by using 'time' in the
shell to measure a million calls to the solver -- gave per-call
times that were better by about a factor of five. I need to be
clear that this solver does not conform to the original interface,
and probably most of speedup is due to choosing an interface that
gives an easier fit to the solver.
It's not something I'd expect.
I used "by value" type of interface both for input and for output in
order to reduce a chance of buggy solutions to mess with the test
bench. I fully expect that "by reference" type of interface could
be somewhat faster. May be, 1.5x faster for very fast solutions.
But I certainly don't expect a factor of five.
I will try to explain below.
Sorry not to have something better for you. It was a fair amount
of work to produce the results above.
I read your code briefly, but didn't try to understand it yet.
I need to say again that the code I posted was not code that I wrote.
The approach used is similar to code I did write, but the actual code
is quite a bit different.
I plugged it into my test bench and measured speed on very old home
PC (Intel i5-3450). With big values of rep1 it was ~3.3 times
faster than my original solution and ~1.9 times slower than my 2nd solution. So, speed-wise your solution is good.
One thing that I don't particularly like after taking a glance - it
does not appear to be thread-safe. But it is probably very easy to
make it thread safe.
Do you mean because it is using (and changing) global objects? Yes
that is a drawback.
Another thing that I don't particularly like - if we want N_BOXES >
32 then it requires modifications; if we want N_BOXES > 64 then it requires more serious modifications.
As it turns out my code handles N_BOXES up to 52. You may laugh
when you see why.
Now few words about my own solutions.
1. Original solution:
This solution is based on the proof that was given to me by somebody
with abstract math mind.
Yes, I posted a comment about that
Michael S <[email protected]> writes:
On Wed, 08 Apr 2026 08:42:11 -0700
Tim Rentsch <[email protected]> wrote:
Michael S <[email protected]> writes:
As it turns out my code handles N_BOXES up to 52. You may laugh
when you see why.
Now few words about my own solutions.
1. Original solution:
This solution is based on the proof that was given to me by somebody
with abstract math mind.
Yes, I posted a comment about that
2. It is solution that I found independently, for which I hopefully
have a proof in the head, but I didn't write it out yet and didn't
show yet to said mathematician. It would not surprise me if idea
of these solution is the same as yours.
I don't really understand that code yet, but it looks like your
code is a bit fancier than mine (the code that wasn't posted
because it uses a different interface).
Later today (tonight) I plan to post both solutions here.
Here is an outline of the first (non-interface-compatible) code that
I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means done)
a "solution state" to direct the search (conceptually by value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in a
way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values can
simply be directly compared, without any masking.
Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected box"
set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order bits
this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
Michael S <[email protected]> writes:
On Wed, 1 Apr 2026 16:34:47 +0300
Michael S <[email protected]> wrote:
<snip>
My implementation #1.
It is based on the proof provided by somebody with abstract math
attitude.
He said that his proof is something that the 5th grade
schoolchildren could find with in 5 minutes.
I am no 5th pupil any longer. Even understanding of his proof took
me significantly longer than 5 minutes :(
Here is my attempt of translation (original proof was not in
English):
"Solution would be done by induction by number of cookies of each
sort (the same for all sorts).
For K=2 the solution is simple. Please figure it out yourself.
Now let's take K > 2 and postulate that for numbers of cookies per
box that are smaller than K the assertion already proven.
Let's take some disposition for which the assertion is correct and
remove the corresponding "thread" (i.e. one cookie from each box,
all of different sorts). Then supposition of induction is correct
for remaining disposition, which means that we can remove yet
another thread, then yet another etc.. for a total of K threads.
That is, for K>2 there are more than 2 threads.
Now let's exchange places between any 2 two cookies from different
boxes. Since by doing so we are touching 1 or 2 threads then there
still be at least one thread untouched (at least k-2 threads,
actually [M.S.]). It means that the property "we can remove a
thread" is not disturbed by our action (i.e. by exchange of two
cookies). What is left it is to realize that by means of such
exchanges any disposition can be mutated from any other disposition.
That's all."
I am confident that my understanding of this description, if indeed
it ever occurs, would take a good deal longer than five minutes.
Sadly I think it is representative of many of the explanations
offered by mathematical types.
And here is a code. It is not very fast. But the speed is almost
the same on average and in the worst case.
#include <stdint.h>
#include <stdbool.h>
#include "solver.h"
peek_t solver(boxes_t boxes)
{
uint8_t na[N_BOXES] = {0};
typedef struct { uint8_t s,d; } xch_t;
xch_t xch[N_BOXES*BOX_SIZE];
int i = 0;
for (int src_bi = 0; src_bi < N_BOXES; ++src_bi) {
for (int src_ci = na[src_bi]; src_ci < BOX_SIZE; ++src_ci) {
uint8_t dst_bi = boxes.b[src_bi][src_ci];
while (dst_bi != src_bi) {
uint8_t dst_ci = na[dst_bi];
uint8_t dst_v = boxes.b[dst_bi][dst_ci];
na[dst_bi] = dst_ci+1;
xch[i++] = (xch_t){ .s=src_bi, .d=dst_bi};
dst_bi = dst_v;
}
xch[i++] = (xch_t){ .s=src_bi, .d=dst_bi};
}
na[src_bi] = BOX_SIZE;
}
for (int bi = 0; bi < N_BOXES; ++bi)
for (int ci = 0; ci < BOX_SIZE; ++ci)
boxes.b[bi][ci] = bi;
uint8_t tx[N_BOXES][BOX_SIZE];
for (int bi = 0; bi < N_BOXES; ++bi)
for (int ci = 0; ci < BOX_SIZE; ++ci)
tx[bi][ci] = ci;
peek_t threads[BOX_SIZE];
for (int ci = 0; ci < BOX_SIZE; ++ci)
for (int bi = 0; bi < N_BOXES; ++bi)
threads[ci].b[bi] = ci;
for (int i = N_BOXES*BOX_SIZE-1; i >= 0; --i) {
uint8_t src_bi = xch[i].s;
uint8_t dst_bi = xch[i].d;
uint8_t dst_ci = na[dst_bi] - 1;
na[dst_bi] = dst_ci;
if (src_bi != dst_bi) {
uint8_t src_ci = na[src_bi];
uint8_t src_v = boxes.b[src_bi][src_ci];
if (src_v != dst_bi) {
uint8_t src_t = tx[src_bi][src_ci];
uint8_t dst_t = tx[dst_bi][dst_ci];
if (src_t != dst_t) {
// 2 threads broken. Fix
uint8_t v2bi[N_BOXES];
for (int bi = 0; bi < N_BOXES; ++bi)
v2bi[boxes.b[bi][threads[src_t].b[bi]]] = bi;
boxes.b[src_bi][src_ci] = dst_bi;
boxes.b[dst_bi][dst_ci] = src_v;
for (uint8_t v = dst_bi; v != src_v;) {
uint8_t bi = v2bi[v];
uint8_t c0 = threads[src_t].b[bi];
uint8_t c1 = threads[dst_t].b[bi];
threads[src_t].b[bi] = c1;
threads[dst_t].b[bi] = c0;
tx[bi][c1] = src_t;
tx[bi][c0] = dst_t;
v = boxes.b[bi][c1];
}
} else {
boxes.b[src_bi][src_ci] = dst_bi;
boxes.b[dst_bi][dst_ci] = src_v;
}
}
}
}
return threads[0];
}
In most cases I'm not a big fan of interactive debugging, but
trying to understand how this works might merit an exception.
Michael S <[email protected]> writes:
On Wed, 1 Apr 2026 16:34:47 +0300
Michael S <[email protected]> wrote:
<snip>
My solution #2.
That is my own, with no proof preceding code. It went in other
direction
- from hacking the code to proving it later.
It is pretty fast. Can be made faster yet by replication of some
code and re-arrangement in the find() routine, but for purposes of illustration I wanted it short.
Just fyi.. I did some speed comparisons between the code below and
the compatible solver that I wrote. Your code is faster, scales
better, and also has better worst-case behavior (which is to say
that my code has worse worst-case behavior). So I think the timing
numbers I was seeing before may have been just the result of being
lucky in the draw of random numbers.
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "solver.h"
static
uint8_t solver_find(
const uint8_t b_idx[N_BOXES][N_BOXES],
uint8_t result[N_BOXES],
const uint8_t free[N_BOXES],
int n_free,
uint8_t v,
const uint8_t bx[N_BOXES][BOX_SIZE])
{
typedef struct {
uint8_t parent, bi;
} db_rec;
db_rec db[N_BOXES];
bool valid[N_BOXES] = {0};
uint8_t que[N_BOXES], *wr = que, *rd = que;
valid[v] = true;
*wr++ = v;
for (;;) {
uint8_t vv = *rd++;
for (int i = 0; i < n_free; ++i) {
uint8_t bi = free[i];
uint8_t ci = b_idx[bi][vv];
if (ci != BOX_SIZE) {
result[bi] = ci;
while (vv != v) {
bi = db[vv].bi;
vv = db[vv].parent;
result[bi] = b_idx[bi][vv];
};
return i;
}
}
// add neighbors to database
for (int bi = 0; bi < N_BOXES; ++bi) {
if (b_idx[bi][vv] != BOX_SIZE) {
uint8_t a = bx[bi][result[bi]];
if (!valid[a]) {
valid[a] = true;
db[a] = (db_rec){ .parent = vv, .bi = bi };
*wr++ = a;
}
}
}
}
}
peek_t solver(boxes_t boxes)
{
// build index
uint8_t b_idx[N_BOXES][N_BOXES];
memset(b_idx, BOX_SIZE, sizeof(b_idx));
for (int bi = 0; bi < N_BOXES; ++bi) {
for (int ci = 0; ci < BOX_SIZE; ++ci) {
uint8_t v = boxes.b[bi][ci];
if (b_idx[bi][v] == BOX_SIZE)
b_idx[bi][v] = ci;
}
}
uint8_t free_boxes[N_BOXES];
for (int bi = 0; bi < N_BOXES; ++bi)
free_boxes[bi] = bi;
int n_free = N_BOXES;
peek_t result = {0};
bool used_sorts[N_BOXES]={0};
while (n_free > 0) {
int bi = free_boxes[--n_free]; // new set
result.b[bi] = 0;
uint8_t v = boxes.b[bi][0];
used_sorts[v] = true;
uint8_t pend_que[N_BOXES],
*pend_wr = pend_que, *pend_rd = pend_que;
*pend_wr++ = bi;
while (pend_wr != pend_rd && n_free != 0) {
int bk = *pend_rd++;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
v = boxes.b[bk][ci];
if (!used_sorts[v]) {
// new value in set
uint8_t v_fi = solver_find(
b_idx, result.b, free_boxes,
n_free, v, boxes.b);
uint8_t v_bi = free_boxes[v_fi];
used_sorts[v] = true;
*pend_wr++ = v_bi;
free_boxes[v_fi] = free_boxes[--n_free];
// remove from free list
}
}
}
}
return result;
}
I think I could eventually understand what this code is doing, and
after that get a sense of how and why it works. In its current
form I think that would take me a fair amount of time. It would
help to have a roadmap, more evocative names, more functional
decomposition, or ideally all three. Please understand, I'm not
asking or suggesting you do anything; my intention is only to
provide feedback that may be of some benefit to you.
Tim Rentsch <[email protected]> wrote:[...]
Michael S <[email protected]> writes:
Tim Rentsch <[email protected]> wrote:
I ran into trouble when I tried to work within the posted framework
to drive a solver. The difficulties were of several kinds. The
code didn't compile in my standard compilation environment, which
is roughly this (the c99 might be c11 but that made no difference):
gcc -x c -std=c99 -pedantic ...
What were the problems?
My gcc 14.2.0 on msys2 produces no warnings or errors with above
flags.
These source lines
struct timespec t0;
clock_gettime(CLOCK_MONOTONIC, &t0);
produced these diagnostics
tb.c:134:21: error: storage size of 't0' isn't known
tb.c:135:5: warning: implicit declaration of function
'clock_gettime' [...] tb.c:135:19: error: 'CLOCK_MONOTONIC'
undeclared (first use in this function)
under -std=c99. Apparently the first diagnostic, about
struct timespec, is fixed under -std=c11. It wasn't hard
to fix these, but it was irksome.
Sounds like under Linux and with -std=c99 flag function clock_gettime()
and related structures and constants are not declared/defined by default
in time.h.
Man page suggests magic pixie dust:
#define _XOPEN_SOURCE 600
#include <time.h>
#include <unistd.h>
If you ask why I used clock_gettime() instead of C standard
timespec_get() then the answer is that on Windows, eps. on older
versions like Win7 and WS2008 (which I prefer over newer stuff for
reasons unrelated to quality of C RTL), precision of timespec_get() is
poor. clock_gettime(CLOCK_MONOTONIC,) is much better.
On Thu, 09 Apr 2026 16:37:30 -0700
Tim Rentsch <[email protected]> wrote:
Michael S <[email protected]> writes:
On Wed, 1 Apr 2026 16:34:47 +0300
Michael S <[email protected]> wrote:
<snip>
My implementation #1.
It is based on the proof provided by somebody with abstract math
attitude.
He said that his proof is something that the 5th grade
schoolchildren could find with in 5 minutes.
I am no 5th pupil any longer. Even understanding of his proof took
me significantly longer than 5 minutes :(
Here is my attempt of translation (original proof was not in
English):
"Solution would be done by induction by number of cookies of each
sort (the same for all sorts).
For K=2 the solution is simple. Please figure it out yourself.
Now let's take K > 2 and postulate that for numbers of cookies per
box that are smaller than K the assertion already proven.
Let's take some disposition for which the assertion is correct and
remove the corresponding "thread" (i.e. one cookie from each box,
all of different sorts). Then supposition of induction is correct
for remaining disposition, which means that we can remove yet
another thread, then yet another etc.. for a total of K threads.
That is, for K>2 there are more than 2 threads.
Now let's exchange places between any 2 two cookies from different
boxes. Since by doing so we are touching 1 or 2 threads then there
still be at least one thread untouched (at least k-2 threads,
actually [M.S.]). It means that the property "we can remove a
thread" is not disturbed by our action (i.e. by exchange of two
cookies). What is left it is to realize that by means of such
exchanges any disposition can be mutated from any other disposition.
That's all."
I am confident that my understanding of this description, if indeed
it ever occurs, would take a good deal longer than five minutes.
Sadly I think it is representative of many of the explanations
offered by mathematical types.
He was not fully sutisfied himself. 20 hours later he came with other formulation. Here is my attemp of translatioon:
1. There exist a disposition of cookies in boxes, for which it is
possible to select one cookie from each box, all of different sorts.
That is obvious. [MS: For example, each sort of cookies in its own dedicated box. That is a disposition that I use in my code] Let's call
the selection "thread".
2. There exists an operation that changes the disposition, but
preserves the property "it is possible to select a thread" (the
description of the operation would be given separately, a.t.m. it does
not matter).
3. By means of multiple operations of that type it is possible to turn
any arbitrary disposition to any other. Since after every step the
property "it is possible to select a thread" is preserved and because
there exist a disposition, for which this property is true, it means
that the property is true for any possible disposition.
4. The operations in question - exchange of any pair of cookies.
Comment: Induction is used only in (2) for proof that an operation
preserves the desired property.
For me, the second variant was less helpful. May be, for you it would
be more helpful.
[...code...]
In most cases I'm not a big fan of interactive debugging, but
trying to understand how this works might merit an exception.
Frankly, not all difficulties of following my code caused by the fact
that algorithm is derived from abstract proof.
Significant share of hurdles are rooted in my somewhat conflicting
desires to avoid all searches, except for inevitable search in the
innermost loop of the second pass, but at the same time to keep format
of the governing database (xch array+na array) minimalistic.
On Thu, 09 Apr 2026 21:22:51 -0700
Tim Rentsch <[email protected]> wrote:
[...]
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values can
simply be directly compared, without any masking.
Is it proven that this step helps?
To me it sounds unnecessary.
[...]
I think that's it. I hope it makes sense.
It makes sense, but it looks to me as solution by exhausting, augmented
by "smallest number of boxes" heuristic.
It's not obvious [to me] without further thinking that the worst case is
not very slow.
I don't have a roadmap, but I have a variant of [approximately] the
same algorithm implemented in the style that is closer to your
preferences: implicit bookkeeping by recursion instead of explicit
lists and back-trace pointers. I even borrowed the name "seen" from the
code of your friend.
[..code..]
Here is an outline of the first (non-interface-compatible) code that
I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means done)
a "solution state" to direct the search (conceptually by value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in a
way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values can
simply be directly compared, without any masking. Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected box"
set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order bits
this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
On Thu, 09 Apr 2026 21:22:51 -0700
Tim Rentsch <[email protected]> wrote:
Here is an outline of the first (non-interface-compatible) code that
I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means done)
a "solution state" to direct the search (conceptually by value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in a
way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values can
simply be directly compared, without any masking. Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected box"
set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order bits
this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
I need clarification for the last step.
What exactly do we do after our fist guess failed?
Supposedly continue to the next sort of cookies that has the same # of
boxes as the one we just tried.
But what we do after that? Do we have to try cookies with higher number
of boxes?
If yes, does it apply in case when cookies that we tried before had
just one box?
Michael S <[email protected]> writes:
On Thu, 09 Apr 2026 21:22:51 -0700
Tim Rentsch <[email protected]> wrote:
Here is an outline of the first (non-interface-compatible) code
that I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means
done) a "solution state" to direct the search (conceptually by
value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in
a way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values
can simply be directly compared, without any masking. Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected
box" set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order
bits this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
I need clarification for the last step.
Here is a mixture of C code and pseudocode. A SolveState is an
array, one element for each cookie type.
Answer
solve( SolveState *problem ){
Answer answer;
jmp_buf found;
if( seek( &answer, found, problem ) ) return answer;
printf( "No joy\n" );
exit(0);
}
/* Note that seek() returns true if and only if a longjmp() was done.
*/
_Bool
seek( Answer *answer, jmp_buf found, SolveState *problem ){
if( setjmp( found ) ) return true;
return look( answer, found, 0, problem );
}
/* The call to look() after the if() starts the search going. */
/* If an answer is found, a longjmp() will go back to the 'return
true;' */
_Bool
look( Answer *answer, jmp_buf found, Index level, SolveState *in ){
SolveState work;
...
if( level == COOKIE_LIMIT ) longjmp( found, 1 );
... ignoring the "optimization" step of choosing the best cookie
...
for each box B in in->cookies[level] {
remember box B for the cookie in->cookies[level], in *answer
copy in->cookies[ level+1 .. COOKIE_LEVEL ) to work
remove B from each work.cookies[ level+1 .. COOKIE_LEVEL )
look( out, found, level+1, &work );
}
return false;
}
/* Note that 'true' is never returned by this function. */
/* If there are more boxes to try for the current cookie, recurse. */
/* When there are no more boxes to try, return false. */
/* If/when all of the cookies have been assigned, longjmp() */
/* Doing a longjmp() will cause seek() to return true. */
The notation ...cookies[ level+1 .. COOKIE_LEVEL ) is an implied
for() loop.
Now for your questions...
What exactly do we do after our fist guess failed?
If you mean the first box for the current cookie, go on to the
next box for that cookie.
If you mean all the boxes for the current cookie failed, simply
return.
If you mean all the boxes for the cookie on the first level, simply
return, which will cause an outer call to fail irrevocably. Under
the conditions of the problem, this never happens, if the code has
been written correctly.
Supposedly continue to the next sort of cookies that has the same #
of boxes as the one we just tried.
But what we do after that? Do we have to try cookies with higher
number of boxes?
No. At each level there is exactly one cookie to try, and we try
each of the boxes that cookie might be taken from (at this point in
the search); if none of those boxes work, backtrack -- which is to
say, return to the next outer level.
If yes, does it apply in case when cookies that we tried before had
just one box?
At each level, we are never concerned with the cookie choices that
were made at previous levels (which means a smaller level index)
as those choices are "frozen" for this part of the search.
I hope these answers clear up what you're looking for.
Michael S <[email protected]> writes:
On Thu, 09 Apr 2026 21:22:51 -0700
Tim Rentsch <[email protected]> wrote:
Here is an outline of the first (non-interface-compatible) code
that I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means
done) a "solution state" to direct the search (conceptually by
value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in
a way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values
can simply be directly compared, without any masking. Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected
box" set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order
bits this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
I need clarification for the last step.
Here is a mixture of C code and pseudocode. A SolveState is an
array, one element for each cookie type.
Answer
solve( SolveState *problem ){
Answer answer;
jmp_buf found;
if( seek( &answer, found, problem ) ) return answer;
printf( "No joy\n" );
exit(0);
}
/* Note that seek() returns true if and only if a longjmp() was done.
*/
_Bool
seek( Answer *answer, jmp_buf found, SolveState *problem ){
if( setjmp( found ) ) return true;
return look( answer, found, 0, problem );
}
/* The call to look() after the if() starts the search going. */
/* If an answer is found, a longjmp() will go back to the 'return
true;' */
_Bool
look( Answer *answer, jmp_buf found, Index level, SolveState *in ){
SolveState work;
...
if( level == COOKIE_LIMIT ) longjmp( found, 1 );
... ignoring the "optimization" step of choosing the best cookie
...
for each box B in in->cookies[level] {
remember box B for the cookie in->cookies[level], in *answer
copy in->cookies[ level+1 .. COOKIE_LEVEL ) to work
remove B from each work.cookies[ level+1 .. COOKIE_LEVEL )
look( out, found, level+1, &work );
}
return false;
}
/* Note that 'true' is never returned by this function. */
/* If there are more boxes to try for the current cookie, recurse. */
/* When there are no more boxes to try, return false. */
/* If/when all of the cookies have been assigned, longjmp() */
/* Doing a longjmp() will cause seek() to return true. */
The notation ...cookies[ level+1 .. COOKIE_LEVEL ) is an implied
for() loop.
Now for your questions...
What exactly do we do after our fist guess failed?
If you mean the first box for the current cookie, go on to the
next box for that cookie.
If you mean all the boxes for the current cookie failed, simply
return.
If you mean all the boxes for the cookie on the first level, simply
return, which will cause an outer call to fail irrevocably. Under
the conditions of the problem, this never happens, if the code has
been written correctly.
Supposedly continue to the next sort of cookies that has the same #
of boxes as the one we just tried.
But what we do after that? Do we have to try cookies with higher
number of boxes?
No. At each level there is exactly one cookie to try, and we try
each of the boxes that cookie might be taken from (at this point in
the search); if none of those boxes work, backtrack -- which is to
say, return to the next outer level.
If yes, does it apply in case when cookies that we tried before had
just one box?
At each level, we are never concerned with the cookie choices that
were made at previous levels (which means a smaller level index)
as those choices are "frozen" for this part of the search.
I hope these answers clear up what you're looking for.
Michael S <[email protected]> writes:
On Thu, 09 Apr 2026 21:22:51 -0700
Tim Rentsch <[email protected]> wrote:
Here is an outline of the first (non-interface-compatible) code
that I wrote.
Use a straightforward backtracking scheme. Make a choice at the
current level, and do a recursive call to try the next level. If
the next level returns, it failed, so go on to the next choice at
this level and call again.
The output value is an array (passed recursively via pointer) with
either a cookie type for each box or a box number for each cookie
type (I don't remember which). This array is filled in as the
recursive backtrack progresses.
The whole search is started by doing a setjmp() before the call to
the top level. If the search succeeds, a longjmp() is done to
deliver an answer. That means that if the call to the top level
returns then the search was a failure. The central recursive
function has four parameters:
an Answer *'out', to hold the solution
a jmp_buf 'found', for a longjmp() when a full answer is found
a counter/index to reflect search depth (starting 0, 20 means
done) a "solution state" to direct the search (conceptually by
value)
The "solution state" is at the heart of the algorithm. It's a
structure holding an array of 20 "cookie values", where a cookie
value is represented by an unsigned 64-bit int. The bits are
used as follows:
low six bits: the cookie type
next 52 bits: a "box mask", indicating at least one cookie of
this type in the corresponding box (so up to 52
boxes)
upper bits: count of boxes (ie, number of ones in the box mask)
The cookie type is important when storing a partial result in the
answer, not at any other time.
The box mask is important to know which boxes to try during the
backtracking.
The count of boxes is important to help guide the backtracking, in
a way that we hope will get an answer faster.
At each level of recursion, do the following:
If the search depth is the number of boxes, longjmp()
Next find the cookie of cookies in the solution state at or
above this level that has the smallest number of boxes. Because
the box count is held in the uppermost bits the 64-bit values
can simply be directly compared, without any masking. Move that
cookie to the array location corresponding to the current depth
(and put the cookie that was there in the hole left by doing the
move).
For each box in the box mask of that cookie, try selecting that
box for this cookie, which means:
copy the state of cookies above this level to a new solution
state, so the changed solution state described above can be
left alone
for each of those cookies, if the cookie has the "selected
box" set in its box mask, clear the bit and reduce the box count
by one. (again since the box count is held in high order
bits this count can be reduced by just subtracting a scaled one
value.)
recurse to continue searching at the next level
If we run out of boxes to check, just return; continue trying
alternatives at the next level up, if there is one.
I think that's it. I hope it makes sense.
I need clarification for the last step.
Here is a mixture of C code and pseudocode. A SolveState is an
array, one element for each cookie type.
Answer
solve( SolveState *problem ){
Answer answer;
jmp_buf found;
if( seek( &answer, found, problem ) ) return answer;
printf( "No joy\n" );
exit(0);
}
/* Note that seek() returns true if and only if a longjmp() was done.
*/
_Bool
seek( Answer *answer, jmp_buf found, SolveState *problem ){
if( setjmp( found ) ) return true;
return look( answer, found, 0, problem );
}
/* The call to look() after the if() starts the search going. */
/* If an answer is found, a longjmp() will go back to the 'return
true;' */
_Bool
look( Answer *answer, jmp_buf found, Index level, SolveState *in ){
SolveState work;
...
if( level == COOKIE_LIMIT ) longjmp( found, 1 );
... ignoring the "optimization" step of choosing the best cookie
...
for each box B in in->cookies[level] {
remember box B for the cookie in->cookies[level], in *answer
copy in->cookies[ level+1 .. COOKIE_LEVEL ) to work
remove B from each work.cookies[ level+1 .. COOKIE_LEVEL )
look( out, found, level+1, &work );
}
return false;
}
/* Note that 'true' is never returned by this function. */
/* If there are more boxes to try for the current cookie, recurse. */
/* When there are no more boxes to try, return false. */
/* If/when all of the cookies have been assigned, longjmp() */
/* Doing a longjmp() will cause seek() to return true. */
The notation ...cookies[ level+1 .. COOKIE_LEVEL ) is an implied
for() loop.
Now for your questions...
What exactly do we do after our fist guess failed?
If you mean the first box for the current cookie, go on to the
next box for that cookie.
If you mean all the boxes for the current cookie failed, simply
return.
If you mean all the boxes for the cookie on the first level, simply
return, which will cause an outer call to fail irrevocably. Under
the conditions of the problem, this never happens, if the code has
been written correctly.
Supposedly continue to the next sort of cookies that has the same #
of boxes as the one we just tried.
But what we do after that? Do we have to try cookies with higher
number of boxes?
No. At each level there is exactly one cookie to try, and we try
each of the boxes that cookie might be taken from (at this point in
the search); if none of those boxes work, backtrack -- which is to
say, return to the next outer level.
If yes, does it apply in case when cookies that we tried before had
just one box?
At each level, we are never concerned with the cookie choices that
were made at previous levels (which means a smaller level index)
as those choices are "frozen" for this part of the search.
I hope these answers clear up what you're looking for.
After that, I talked to a friend to try a different approach to
producing a solution. After some light editing by myself -- mostly
just formatting and some name changes -- the code below was put into
the hopper. (Note: I claim no credit for code below. I did some
editing to make it more readable but beyond that none of it is a
result of my efforts.)
#include <stdint.h>
#include "solver.h"
static uint32_t adjacent[ N_BOXES ];
static int cookie_of_box[ N_BOXES ];
static int box_of_cookie[ N_BOXES ];
static uint32_t seen;
static int
search( int b ){
uint32_t m = adjacent[b];
while( m ){
uint32_t bit = m & -m;
m ^= bit;
int c = __builtin_ctz( bit );
if( seen & 1u<<c ) continue;
seen |= 1u<<c;
if( box_of_cookie[c] == -1 || search( box_of_cookie[c]
) ){ box_of_cookie[c] = b;
cookie_of_box[b] = c;
return 1;
}
}
return 0;
}
peek_t
solver( boxes_t boxes ){
peek_t res;
for( int i = 0; i < N_BOXES; i++ ){
uint32_t m = 0;
for( int k = 0; k < BOX_SIZE; k++ ){
m |= 1u << boxes.b[i][k];
}
adjacent[i] = m;
cookie_of_box[i] = -1;
box_of_cookie[i] = -1;
}
for( int i = 0; i < N_BOXES; i++ ){
seen = 0;
search( i );
}
for( int i = 0; i < N_BOXES; i++ ){
int c = cookie_of_box[i];
for( int k = 0; k < BOX_SIZE; k++ ){
if( boxes.b[i][k] == c ){
res.b[i] = k;
break;
}
}
}
return res;
}
Despite being derived independently, this code uses the same sort of
approach that I had used, except my code was recursive rather than
iterative.
On Wed, 08 Apr 2026 08:42:11 -0700
Tim Rentsch <[email protected]> wrote:
After that, I talked to a friend to try a different approach to
producing a solution. After some light editing by myself -- mostly
just formatting and some name changes -- the code below was put into
the hopper. (Note: I claim no credit for code below. I did some
editing to make it more readable but beyond that none of it is a
result of my efforts.)
#include <stdint.h>
#include "solver.h"
static uint32_t adjacent[ N_BOXES ];
static int cookie_of_box[ N_BOXES ];
static int box_of_cookie[ N_BOXES ];
static uint32_t seen;
static int
search( int b ){
uint32_t m = adjacent[b];
while( m ){
uint32_t bit = m & -m;
m ^= bit;
int c = __builtin_ctz( bit );
if( seen & 1u<<c ) continue;
seen |= 1u<<c;
if( box_of_cookie[c] == -1 || search( box_of_cookie[c]
) ){ box_of_cookie[c] = b;
cookie_of_box[b] = c;
return 1;
}
}
return 0;
}
peek_t
solver( boxes_t boxes ){
peek_t res;
for( int i = 0; i < N_BOXES; i++ ){
uint32_t m = 0;
for( int k = 0; k < BOX_SIZE; k++ ){
m |= 1u << boxes.b[i][k];
}
adjacent[i] = m;
cookie_of_box[i] = -1;
box_of_cookie[i] = -1;
}
for( int i = 0; i < N_BOXES; i++ ){
seen = 0;
search( i );
}
for( int i = 0; i < N_BOXES; i++ ){
int c = cookie_of_box[i];
for( int k = 0; k < BOX_SIZE; k++ ){
if( boxes.b[i][k] == c ){
res.b[i] = k;
break;
}
}
}
return res;
}
Despite being derived independently, this code uses the same sort of
approach that I had used, except my code was recursive rather than
iterative.
I finally looked closer to the code of your friend.
To me, it does not look at all similar to description of "your"
algorithm in your other post.
It appears more similar to my 2nd algorithm, but sort of orthogonal to
it - my outermost loop iterate through sorts of cookies; in the code of
your friend outermost loop iterate through boxes.
Another difference, less important for robustness, more important for measured speed, esp. on avearge, is when search decides to revert
previous selection. I am doing it only after all possibilities to
select at the top level failed. Your friend makes no such effort and
easily dives deep. In principle, both approaches are equivalent, but
in physical world recursive call is significantly slower than
iteration of simple loop and excessive updates of state variables are
also slower then lookups without updates.
And, of course, apart from difference in algorithm there is a
significant difference in implementation technique. Your friend took advantage of the fact that the challenge was specified for relatively
small number of boxes and intensely used bit masks. I [didn't took
similar advantage [except for using byte variables instead of short
or int] and used arrays. Of course, the technique applied by your
friend is faster. His solution ended up ~2 times slower then mine
not because of technique, but because of algorithmic difference
mentioned in previous paragraph.
Today I modified his code, applying variant of the same algorithmic
change. At the same opportunity I removed use of static data and
increased size of bit masks to 64 bits. As expected, resulting code
was very fast, ~3 times faster than my own. [...code...]
On Sat, 11 Apr 2026 15:57:23 -0700
Tim Rentsch <[email protected]> wrote:
Just now I paid attention that when posting last night I made a
mistake in cut&past. It result in the post that contained the
source code but missed text that explains what it's all about.
So, second try.
Could the code below be considered an implementation of your
algorithm?
It uses different programming technique:
- iteration, goto and explicit stack instead of recursion
- loop instead of longjmp
- popcount instead of maintaining 'count of boxes' field
- free_sorts bit mask instead of list of sorts of cookies
indexed by levels (later on I realized that this modification
was unnecessary)
I coded it in such manner because as the next step I am planning
to add instrumentation that would be easier in iterative code than
in the recursive one.
It seems to me that resulting search order is either exactly or at
least approximately the same as in your description.
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <limits.h>
#include "solver.h"
peek_t solver(boxes_t boxes)
{
uint64_t boxes_of_sorts[N_BOXES] = {0};
uint64_t sorts_of_boxes[N_BOXES];
uint8_t b_idx[N_BOXES][N_BOXES];
// build indices
for (int bi = 0; bi < N_BOXES; ++bi) {
uint64_t sorts = 0;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
uint8_t sort = boxes.b[bi][ci];
sorts |= (uint64_t)1 << sort;
b_idx[bi][sort] = ci;
boxes_of_sorts[sort] |= (uint64_t)1 << bi;
}
sorts_of_boxes[bi] = sorts;
}
peek_t result={0};
uint64_t free_boxes = (uint64_t)-1 >> (64-N_BOXES);
uint64_t free_sorts = (uint64_t)-1 >> (64-N_BOXES);
struct stack_node {
uint64_t free_boxes, free_sorts, boxes;
int sel_sort;
} stack[N_BOXES];
int level = 0;
do {
// find free sort that remains in the smallest # of free boxes
uint64_t msk = free_sorts;
int nb_min = INT_MAX;
int sel_sort = N_BOXES;
while (msk) {
int sort = __builtin_ctzll(msk);
uint64_t boxes = boxes_of_sorts[sort] & free_boxes;
if (boxes) {
int nb = __builtin_popcountll(boxes);
if (nb_min > nb) {
nb_min = nb;
sel_sort = sort;
if (nb == 1)
break;
}
}
msk ^= (uint64_t)1 << sort;
}
if (sel_sort == N_BOXES)
goto pop;
// candidate sort found
uint64_t boxes = boxes_of_sorts[sel_sort] & free_boxes;
back:;
int bi = __builtin_ctzll(boxes);
result.b[bi] = b_idx[bi][sel_sort];
boxes ^= (uint64_t)1 << bi;
// preserve state
stack[level] = (struct stack_node){
.free_boxes=free_boxes, .free_sorts=free_sorts,
.boxes=boxes, .sel_sort = sel_sort};
// go to the next level
free_boxes ^= (uint64_t)1 << bi;
free_sorts ^= (uint64_t)1 << sel_sort;
++level;
continue;
// return to previous level
pop:
while (level > 0) {
--level;
boxes = stack[level].boxes;
if (boxes) {
free_boxes = stack[level].free_boxes;
free_sorts = stack[level].free_sorts;
sel_sort = stack[level].sel_sort;
goto back;
}
}
// should never come here
break;
} while (level < N_BOXES);
return result;
}
Michael S <[email protected]> writes:
On Sat, 11 Apr 2026 15:57:23 -0700
Tim Rentsch <[email protected]> wrote:
[..some earlier back and forth..]
Just now I paid attention that when posting last night I made a
mistake in cut&past. It result in the post that contained the
source code but missed text that explains what it's all about.
So, second try.
Could the code below be considered an implementation of your
algorithm?
I'm not sure. I think it's close, but I'm having trouble being
more definite than that.
It uses different programming technique:
- iteration, goto and explicit stack instead of recursion
I got that.
- loop instead of longjmp
I got that too.
- popcount instead of maintaining 'count of boxes' field
I see that. Interesting choice.
- free_sorts bit mask instead of list of sorts of cookies
indexed by levels (later on I realized that this modification
was unnecessary)
I see that you did this but I was confused about how it worked.
I coded it in such manner because as the next step I am planning
to add instrumentation that would be easier in iterative code than
in the recursive one.
Yes, and surely there are other reasons to want an iterative version
rather than a recursive one.
It seems to me that resulting search order is either exactly or at
least approximately the same as in your description.
Exactly or at least approximately -- I'll go for that. :)
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <limits.h>
#include "solver.h"
peek_t solver(boxes_t boxes)
{
uint64_t boxes_of_sorts[N_BOXES] = {0};
uint64_t sorts_of_boxes[N_BOXES];
uint8_t b_idx[N_BOXES][N_BOXES];
// build indices
for (int bi = 0; bi < N_BOXES; ++bi) {
uint64_t sorts = 0;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
uint8_t sort = boxes.b[bi][ci];
sorts |= (uint64_t)1 << sort;
b_idx[bi][sort] = ci;
boxes_of_sorts[sort] |= (uint64_t)1 << bi;
}
sorts_of_boxes[bi] = sorts;
}
peek_t result={0};
uint64_t free_boxes = (uint64_t)-1 >> (64-N_BOXES);
uint64_t free_sorts = (uint64_t)-1 >> (64-N_BOXES);
struct stack_node {
uint64_t free_boxes, free_sorts, boxes;
int sel_sort;
} stack[N_BOXES];
int level = 0;
do {
// find free sort that remains in the smallest # of free boxes
uint64_t msk = free_sorts;
int nb_min = INT_MAX;
int sel_sort = N_BOXES;
while (msk) {
int sort = __builtin_ctzll(msk);
uint64_t boxes = boxes_of_sorts[sort] & free_boxes;
if (boxes) {
int nb = __builtin_popcountll(boxes);
if (nb_min > nb) {
nb_min = nb;
sel_sort = sort;
if (nb == 1)
break;
}
}
msk ^= (uint64_t)1 << sort;
}
if (sel_sort == N_BOXES)
goto pop;
// candidate sort found
uint64_t boxes = boxes_of_sorts[sel_sort] & free_boxes;
back:;
int bi = __builtin_ctzll(boxes);
result.b[bi] = b_idx[bi][sel_sort];
boxes ^= (uint64_t)1 << bi;
// preserve state
stack[level] = (struct stack_node){
.free_boxes=free_boxes, .free_sorts=free_sorts,
.boxes=boxes, .sel_sort = sel_sort};
// go to the next level
free_boxes ^= (uint64_t)1 << bi;
free_sorts ^= (uint64_t)1 << sel_sort;
++level;
continue;
// return to previous level
pop:
while (level > 0) {
--level;
boxes = stack[level].boxes;
if (boxes) {
free_boxes = stack[level].free_boxes;
free_sorts = stack[level].free_sorts;
sel_sort = stack[level].sel_sort;
goto back;
}
}
// should never come here
break;
} while (level < N_BOXES);
return result;
}
I'm leaving the code in even though I don't have a lot to say
about it. Trying to understand it I kept getting an internal
"complexity threshold exceeded" exception. Finally I decided I
would just try to rewrite it, preserving the spirit although not
all of the details. My code ended up being more lines of source
than yours although the core routine is shorter. There are three
parts that I broke out into separate functions (not shown). Those
parts are, one, a function 'bsets_from_boxes()' to turn a boxes_t
into a collection of bitsets holding the boxes corresponding to
each cookie type; two, a function 'best_next_ctype()' that
chooses which cookie type to consider next; and three, a final
function 'answer()' to turn the 'box_chosen[]' bitset array into
the representation needed by the interface. Here is the central
routine:
peek_t
solver( boxes_t box_contents ){
Bset boxes_free[ COOKIE_LIMIT + 1 ];
Ctype ctypes[ COOKIE_LIMIT ];
Bset boxes_to_try[ COOKIE_LIMIT ];
Bset box_chosen[ COOKIE_LIMIT ];
Bsets bsets = bsets_from_boxes( &box_contents );
Index depth = 0;
Bset free = boxes_free[0] = ~((Bset){-1} <<
COOKIE_LIMIT-1 << 1);
for( Index i = 0; i < LIMIT_OF( ctypes ); i++ ) ctypes[i] = i;
do {
Ctype ctype = best_next_ctype( depth, free, &bsets, ctypes );
Bset chosen;
boxes_to_try[depth] = bsets.bset_of_ctype[ ctype ] & free;
while(
chosen = boxes_to_try[depth],
boxes_to_try[depth] ^= chosen ^= chosen & chosen-1,
!chosen
){
assert( depth > 0 ), --depth;
free = boxes_free[ depth ];
ctype = ctypes[ depth ];
}
box_chosen[ ctype ] = chosen;
assert( free & chosen );
boxes_free[ depth+1 ] = free = free ^ chosen;
} while( ++depth < COOKIE_LIMIT );
return answer( box_chosen, ctypes, &box_contents );
}
Two further comments.
One, by keeping the box_chosen[] array as bit masks, the use of
the __builtin bit function gets deferred and so is done only once
for each output slot, at the end (in the 'answer()' function).
Two, the function 'best_next_ctype()' manipulates the ctypes[]
array as well as returning the cookie type at the appropriate
depth in the array. I tried different policies for when to
select the "best" cookie type, including "always", "never", and
"only for the first N levels", with several choices of N.
Different policies produced different timing values but there
wasn't any obvious relation between them. Something that
occurred to me only later is a policy "choose the best out of the
next K cookie types", for different values of K.
The main thing is I hope the above routine does a better job of
conveying my thoughts and understandings.
Michael S <[email protected]> writes:
On Wed, 08 Apr 2026 08:42:11 -0700
Tim Rentsch <[email protected]> wrote:
After that, I talked to a friend to try a different approach to
producing a solution. After some light editing by myself -- mostly
just formatting and some name changes -- the code below was put
into the hopper. (Note: I claim no credit for code below. I did
some editing to make it more readable but beyond that none of it
is a result of my efforts.)
#include <stdint.h>
#include "solver.h"
static uint32_t adjacent[ N_BOXES ];
static int cookie_of_box[ N_BOXES ];
static int box_of_cookie[ N_BOXES ];
static uint32_t seen;
static int
search( int b ){
uint32_t m = adjacent[b];
while( m ){
uint32_t bit = m & -m;
m ^= bit;
int c = __builtin_ctz( bit );
if( seen & 1u<<c ) continue;
seen |= 1u<<c;
if( box_of_cookie[c] == -1 || search(
box_of_cookie[c] ) ){ box_of_cookie[c] = b;
cookie_of_box[b] = c;
return 1;
}
}
return 0;
}
peek_t
solver( boxes_t boxes ){
peek_t res;
for( int i = 0; i < N_BOXES; i++ ){
uint32_t m = 0;
for( int k = 0; k < BOX_SIZE; k++ ){
m |= 1u << boxes.b[i][k];
}
adjacent[i] = m;
cookie_of_box[i] = -1;
box_of_cookie[i] = -1;
}
for( int i = 0; i < N_BOXES; i++ ){
seen = 0;
search( i );
}
for( int i = 0; i < N_BOXES; i++ ){
int c = cookie_of_box[i];
for( int k = 0; k < BOX_SIZE; k++ ){
if( boxes.b[i][k] == c ){
res.b[i] = k;
break;
}
}
}
return res;
}
Despite being derived independently, this code uses the same sort
of approach that I had used, except my code was recursive rather
than iterative.
I finally looked closer to the code of your friend.
To me, it does not look at all similar to description of "your"
algorithm in your other post.
It appears more similar to my 2nd algorithm, but sort of orthogonal
to it - my outermost loop iterate through sorts of cookies; in the
code of your friend outermost loop iterate through boxes.
I confess I didn't look closely at the algorithm but relied
on comments about how the code worked. I tried to express
this in my earlier posting when I said "the same sort of
approach" rather than "the same approach". Sorry for any
confusion.
Another difference, less important for robustness, more important
for measured speed, esp. on avearge, is when search decides to
revert previous selection. I am doing it only after all
possibilities to select at the top level failed. Your friend makes
no such effort and easily dives deep. In principle, both
approaches are equivalent, but in physical world recursive call is significantly slower than iteration of simple loop and excessive
updates of state variables are also slower then lookups without
updates.
And, of course, apart from difference in algorithm there is a
significant difference in implementation technique. Your friend
took advantage of the fact that the challenge was specified for
relatively small number of boxes and intensely used bit masks. I
[didn't took similar advantage [except for using byte variables
instead of short or int] and used arrays. Of course, the technique
applied by your friend is faster. His solution ended up ~2 times
slower then mine not because of technique, but because of
algorithmic difference mentioned in previous paragraph.
Today I modified his code, applying variant of the same algorithmic
change. At the same opportunity I removed use of static data and
increased size of bit masks to 64 bits. As expected, resulting code
was very fast, ~3 times faster than my own. [...code...]
I'm happy to hear the posting provided an opportunity to more deeply
explore the problem. Also it's interesting to learn the results of
your investigations; thank you for those.
On Thu, 16 Apr 2026 23:52:14 -0700
Tim Rentsch <[email protected]> wrote:
I'd guess that every regular c.l.c reader knows that you like guessing
games. Even if I don't think that it is appropriate here, up to the
certain limit, I am willing to participate.
It's pretty easy to guess that the code above preceded by:
[...]
However I both can not and don't want to guess the content of your
cores routine best_next_ctype(). And without it I don't have much to
say about you solution.
The main thing is I hope the above routine does a better job of
conveying my thoughts and understandings.
It looks like unlike your friend you missed the key - equity of number
of boxes and number of sorts of cookies is not accidental!
It's the
most important thing in the whole exercise. It's what make solution
feasible for much bigger values of N_BOXES, probably up to several K
in less than 1 sec.
But, then again, without source code of best_next_ctype() I can not be
sure about it.
Michael S <[email protected]> writes:
On Thu, 16 Apr 2026 23:52:14 -0700
Tim Rentsch <[email protected]> wrote:
[...]
I'd guess that every regular c.l.c reader knows that you like
guessing games. Even if I don't think that it is appropriate here,
up to the certain limit, I am willing to participate.
I posted what I did because I wanted the focus to be on reading the
code and not on running the code. I was hoping that understanding
would be feasible without needing any guessing.
It's pretty easy to guess that the code above preceded by:
[...]
Looks reasonable.
However I both can not and don't want to guess the content of your
cores routine best_next_ctype(). And without it I don't have much
to say about you solution.
Here is one implementation of best_next_ctype():
Ctype
best_next_ctype( Index depth, Bset free, Bsets *bsets, Ctype ctypes[]
){ // permute?
return ctypes[depth];
}
At the "permute?" comment an implementation could perform any
permutation of the elements of the ctypes[] array, within the range
of ctypes[depth] and ctypes[COOKIE_LIMIT-1], and the result should
still be a working solution. Or just leave it as a comment. The
correctness of the code doesn't depend on what permutation is done
or how the choice is made.
The main thing is I hope the above routine does a better job of
conveying my thoughts and understandings.
It looks like unlike your friend you missed the key - equity of
number of boxes and number of sorts of cookies is not accidental!
The way the code is written it allows the value of COOKIE_LIMIT to
be chosen independently of the value of N_BOXES (or at least I hope
it does). I didn't explore any such cases but it might be
interesting to do that.
It's the
most important thing in the whole exercise. It's what make solution feasible for much bigger values of N_BOXES, probably up to several K
in less than 1 sec.
Of course for the code posted the value of N_BOXES must be no more
than the number of bits in the bitset type Bset (which is meant to
be short for "Box set", or a set of box values).
But, then again, without source code of best_next_ctype() I can not
be sure about it.
My questions are these. One, could you understand what the code is
doing and how it works? And two, if given an appropriate choice of
what permutation is done in best_next_ctype(), does this code do the
same thing as your code? I was trying to match the behavior of your
code but I wasn't able to figure out how it decided where to go
next.
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
#include <iostream>
#include <array>
#include <random>
#include <cstdint>
#include <algorithm>
#include <span>
using namespace std;
int main()
{
using box = array<uint8_t, 10>;
array<box, 20> boxes;
for( size_t bx = 0; bx < boxes.size(); ++bx )
for( uint8_t &cookie : boxes[bx] )
cookie = (uint8_t)bx;
mt19937_64 mt;
uniform_int_distribution<size_t>
rndBox( 0, 19 ),
rndCookie( 1, 9 );
for( box &bx : boxes )
{
for( uint8_t &ck : span( bx.begin() + 1, bx.end() ) )
swap( ck, boxes[rndBox( mt )][rndCookie( mt )] );
swap( bx[0], boxes[rndBox( mt )][0] );
}
for( box &bx : boxes )
sort( bx.begin(), bx.end() );
}
On 17/04/2026 16:58, Bonita Montero wrote:
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
That's not possible. There are 20 types of cookie, but you can only have
10 per box. Those might all be of the same kind.
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
Bonita Montero <[email protected]> writes:
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
No, because this is comp.lang.c, not comp.lang.c++.
Am 17.04.2026 um 21:58 schrieb Scott Lurndal:
Bonita Montero <[email protected]> writes:
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
No, because this is comp.lang.c, not comp.lang.c++.
"The solution does not have to be in C, ..."
On 18/04/2026 04:52, Bonita Montero wrote:
Am 17.04.2026 um 21:58 schrieb Scott Lurndal:
Bonita Montero <[email protected]> writes:
Is this a correct way to randomize the boxes so that there's at least
one cookie of each kind per box ?
No, because this is comp.lang.c, not comp.lang.c++.
"The solution does not have to be in C, ..."
OK, here's one in my scripting language:
If the string data is excluded, my version is 245 characters, compared
with 1589 for your C++. Compilation time is 0.0 seconds.
Am 17.04.2026 um 21:58 schrieb Scott Lurndal:
Bonita Montero <[email protected]> writes:
Is this a correct way to randomize the boxes so that there's at
least one cookie of each kind per box ?
No, because this is comp.lang.c, not comp.lang.c++.
"The solution does not have to be in C, ..."
My questions are these. One, could you understand what the code is
doing and how it works? And two, if given an appropriate choice of
what permutation is done in best_next_ctype(), does this code do the
same thing as your code? I was trying to match the behavior of your
code but I wasn't able to figure out how it decided where to go
next.
On Fri, 17 Apr 2026 07:40:13 -0700
Tim Rentsch <[email protected]> wrote:
My questions are these. One, could you understand what the code is
doing and how it works? And two, if given an appropriate choice of
what permutation is done in best_next_ctype(), does this code do the
same thing as your code? I was trying to match the behavior of your
code but I wasn't able to figure out how it decided where to go
next.
Here is the simplest form of the algorithm that I can come with.
Essentially, it is a serial non-recursive variant of the code of your
friend with no algorithmic or technical enhancements.
Even in this form it is not slow and hopefully makes the key idea more obvious.
#include <stdint.h>
#include <stdbool.h>
#include "solver.h"
peek_t
solver( boxes_t boxes ){
uint8_t box_of_cookie[N_BOXES]; // valid for selected sorts
for (int i = 0; i < N_BOXES; i++)
box_of_cookie[i] = N_BOXES; // value N_BOXES marks unselected sort
// Selection proceeds in progressive steps.
// At each step we select one new box and one new sort.
// A box and a sort selected at step i are never unselected later
// although association between selected boxes and sorts
// can be changed
for (int step = 0; step < N_BOXES; ++step) {
typedef struct {
uint8_t box, sort;
} trace_t;
trace_t levels[N_BOXES];
bool seen[N_BOXES] = {0};
for (int lvl = 0, box = step; ;) {
// look for unseen sort in box
int sort;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
sort = boxes.b[box][ci];
if (!seen[sort])
break;
}
if (seen[sort]) { // no unseen sorts in box
box = levels[--lvl].box; // return to previous level
continue;
}
// Unseen sort found
// It will become a new selection for box
int selected_box = box_of_cookie[sort];
if (selected_box == N_BOXES) {
// sort unselected.
// it means that search at step i succeed
// commit preserved assignments
for (int k = 0; k < lvl; ++k)
box_of_cookie[levels[k].sort] = levels[k].box;
// assign new sort
box_of_cookie[sort] = box;
break;
}
// sort already selected
// Save new assignment for sort.
// Saved box also serves us if we return
// to the current level from above
levels[lvl++] = (trace_t){ .sort=sort, .box=box};
// Mark sort as seen.
// That mark guarantees forward progress of the search
seen[sort] = true;
// Continue search.
// At the next level we will look for new selection
// for old box of sort
box = selected_box;
}
}
// translate result from box_of_cookie[] to index of cookie in box
peek_t res;
for (int c = 0; c < N_BOXES; ++c) {
int b = box_of_cookie[c];
for (int k = 0; k < BOX_SIZE; ++k) {
if (boxes.b[b][k] == c) {
res.b[b] = k;
break;
}
}
}
return res;
}
On Fri, 17 Apr 2026 07:40:13 -0700
Tim Rentsch <[email protected]> wrote:
My questions are these. One, could you understand what the code is
doing and how it works? And two, if given an appropriate choice of
what permutation is done in best_next_ctype(), does this code do the
same thing as your code? I was trying to match the behavior of your
code but I wasn't able to figure out how it decided where to go
next.
Here is the simplest form of the algorithm that I can come with.
Essentially, it is a serial non-recursive variant of the code of your
friend with no algorithmic or technical enhancements.
Even in this form it is not slow and hopefully makes the key idea more obvious.
#include <stdint.h>
#include <stdbool.h>
#include "solver.h"
peek_t
solver( boxes_t boxes ){
uint8_t box_of_cookie[N_BOXES]; // valid for selected sorts
for (int i = 0; i < N_BOXES; i++)
box_of_cookie[i] = N_BOXES; // value N_BOXES marks unselected sort
// Selection proceeds in progressive steps.
// At each step we select one new box and one new sort.
// A box and a sort selected at step i are never unselected later
// although association between selected boxes and sorts
// can be changed
for (int step = 0; step < N_BOXES; ++step) {
typedef struct {
uint8_t box, sort;
} trace_t;
trace_t levels[N_BOXES];
bool seen[N_BOXES] = {0};
for (int lvl = 0, box = step; ;) {
// look for unseen sort in box
int sort;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
sort = boxes.b[box][ci];
if (!seen[sort])
break;
}
if (seen[sort]) { // no unseen sorts in box
box = levels[--lvl].box; // return to previous level
continue;
}
// Unseen sort found
// It will become a new selection for box
int selected_box = box_of_cookie[sort];
if (selected_box == N_BOXES) {
// sort unselected.
// it means that search at step i succeed
// commit preserved assignments
for (int k = 0; k < lvl; ++k)
box_of_cookie[levels[k].sort] = levels[k].box;
// assign new sort
box_of_cookie[sort] = box;
break;
}
// sort already selected
// Save new assignment for sort.
// Saved box also serves us if we return
// to the current level from above
levels[lvl++] = (trace_t){ .sort=sort, .box=box};
// Mark sort as seen.
// That mark guarantees forward progress of the search
seen[sort] = true;
// Continue search.
// At the next level we will look for new selection
// for old box of sort
box = selected_box;
}
}
// translate result from box_of_cookie[] to index of cookie in box
peek_t res;
for (int c = 0; c < N_BOXES; ++c) {
int b = box_of_cookie[c];
for (int k = 0; k < BOX_SIZE; ++k) {
if (boxes.b[b][k] == c) {
res.b[b] = k;
break;
}
}
}
return res;
}
Michael S <[email protected]> writes:
On Fri, 17 Apr 2026 07:40:13 -0700
Tim Rentsch <[email protected]> wrote:
My questions are these. One, could you understand what the code is
doing and how it works? And two, if given an appropriate choice of
what permutation is done in best_next_ctype(), does this code do
the same thing as your code? I was trying to match the behavior
of your code but I wasn't able to figure out how it decided where
to go next.
Here is the simplest form of the algorithm that I can come with. Essentially, it is a serial non-recursive variant of the code of
your friend with no algorithmic or technical enhancements.
Even in this form it is not slow and hopefully makes the key idea
more obvious.
I have difficulty reading code written in the style used below. I
might describe it broadly as too many trees and not enough forest.
That comment is not meant as criticism but given just for
informational value.
My hope has been that my explanations, given first in prose and then
in code, would let you understand how I approached the problem, so
you could see how your approach differs from mine and then explain
what the differences are. As things stand I don't know what are the
common points of reference, or even whether there are any important
ones.
Having said that, let me ask this. What is your goal? I find
myself wondering if we are talking past each other.
#include <stdint.h>
#include <stdbool.h>
#include "solver.h"
peek_t
solver( boxes_t boxes ){
uint8_t box_of_cookie[N_BOXES]; // valid for selected sorts
for (int i = 0; i < N_BOXES; i++)
box_of_cookie[i] = N_BOXES; // value N_BOXES marks unselected
sort
// Selection proceeds in progressive steps.
// At each step we select one new box and one new sort.
// A box and a sort selected at step i are never unselected later
// although association between selected boxes and sorts
// can be changed
for (int step = 0; step < N_BOXES; ++step) {
typedef struct {
uint8_t box, sort;
} trace_t;
trace_t levels[N_BOXES];
bool seen[N_BOXES] = {0};
for (int lvl = 0, box = step; ;) {
// look for unseen sort in box
int sort;
for (int ci = 0; ci < BOX_SIZE; ++ci) {
sort = boxes.b[box][ci];
if (!seen[sort])
break;
}
if (seen[sort]) { // no unseen sorts in box
box = levels[--lvl].box; // return to previous level
continue;
}
// Unseen sort found
// It will become a new selection for box
int selected_box = box_of_cookie[sort];
if (selected_box == N_BOXES) {
// sort unselected.
// it means that search at step i succeed
// commit preserved assignments
for (int k = 0; k < lvl; ++k)
box_of_cookie[levels[k].sort] = levels[k].box;
// assign new sort
box_of_cookie[sort] = box;
break;
}
// sort already selected
// Save new assignment for sort.
// Saved box also serves us if we return
// to the current level from above
levels[lvl++] = (trace_t){ .sort=sort, .box=box};
// Mark sort as seen.
// That mark guarantees forward progress of the search
seen[sort] = true;
// Continue search.
// At the next level we will look for new selection
// for old box of sort
box = selected_box;
}
}
// translate result from box_of_cookie[] to index of cookie in box
peek_t res;
for (int c = 0; c < N_BOXES; ++c) {
int b = box_of_cookie[c];
for (int k = 0; k < BOX_SIZE; ++k) {
if (boxes.b[b][k] == c) {
res.b[b] = k;
break;
}
}
}
return res;
}
Another question: is the above typical of how you usually write
code, or are you writing it like that for my benefit?
| Sysop: | DaiTengu |
|---|---|
| Location: | Appleton, WI |
| Users: | 1,114 |
| Nodes: | 10 (0 / 10) |
| Uptime: | 492515:55:16 |
| Calls: | 14,267 |
| Calls today: | 3 |
| Files: | 186,321 |
| D/L today: |
27,599 files (9,006M bytes) |
| Messages: | 2,518,520 |