from numpy.random import randint, seed
equal_distances = 0
for i in range(5000):
seed(i)
distribution = randint(0, 1000, 10)
dist_mean = sum(distribution) / len(distribution)
low_sum = 0
up_sum = 0
for value in distribution:
if value == dist_mean:
continue
if value < dist_mean:
low_sum += round((dist_mean - value), 1)
elif value > dist_mean:
up_sum += round((value - dist_mean), 1)
if (low_sum == up_sum):
equal_distances += 1

What I expected to happen:

equal_distances should be 5000
What actually happened:

equal_distances = 4021

I need to measure the total distances below the mean and the total distances above the mean for 5000 different distributions to check if they are equal. For some reason I’m only being able to get 4021 and not 5000…

Try using the round() after you are done summing the values in distribution, the rounding may be affecting the result. Try it like this:

from numpy.random import randint, seed
equal_distances = 0
for i in range(5000):
seed(i)
distribution = randint(0, 1000, 10)
dist_mean = sum(distribution) / len(distribution)
low_sum = 0
up_sum = 0
for value in distribution:
if value == dist_mean:
continue
if value < dist_mean:
low_sum +=(dist_mean - value)
elif value > dist_mean:
up_sum +=(value - dist_mean)
if round(low_sum, 1) == round(up_sum, 1):
equal_distances += 1

Very helpful post;
my code was different but the error was identical: 4021 equals out of 5000 and the solution was the same!
My error checking had lead me believe that somewhere a rounding error was occurring but I was having difficulty pinpointing it.
Thanks!

my code:

def give_mean_distance(lst):
avg = round(sum(lst)/len(lst),1)
below = []
above = []
for i in lst:
if i < avg:
# print('below')
#below.append(i)
# calculate distance from mean and add to lst
num = avg - i
# round the value to one decimal
#num = round(num,1)
below.append(num)
elif i == avg:
pass
else:
# print('above')
# above.append(i)
# calculate distance from mean and add to lst
num2 = i - avg
#num2 = round(num2,1)
above.append(num2)
# print(below)
# print(above)
distances_equal = round(sum(below),1) == round(sum(above),1)
return distances_equal
no_equal =[]
equal_distances = 0
for i in range(0,5000,1):
seed(i)
mydist = list(randint(0,1000,10))
# print(mydist)
distance_equality = give_mean_distance(mydist)
# print(distance_equality)
if distance_equality:
equal_distances += 1
else:
no_equal.append(mydist)
equal_distances