In reference to the Guided Project: Profitable App Profiles for the App Store and Google Play Markets
In case interested in using frequencies to find duplicate apps:
#checking for any duplicates using frequencies
def find_frequencies(dataset):
frequency_table = {}
for row in dataset:
name = row[0]
if name in frequency_table:
frequency_table[name] += 1
else:
frequency_table[name] = 1
return frequency_table
freq_table = find_frequencies(goog_data)
# this can be another function`
duplicate = {}
total_duplicates = 0
total=0
for item in freq_table:
total += freq_table[item]
if freq_table[item] > 1:
duplicate[item] = freq_table[item]
total_duplicates += freq_table[item] - 1 # first count is unique
print('Number of Duplicate Apps: ', total_duplicates)
print('\n')
print('Number of unique Apps: ', len(freq_table))
print('\n')
print('Total number of Apps: ', total)