'''
Guided Project: Profitable App Profiles For The App Store And Google Play Markets
REMOVING DUPLICATE ENTRIES - PART 2
i USED THE FOLLOWING SOLUTION INSTEAD OF THE DICTIONARY VERSION, SUGGESTED.
This was my first code in Python.
Please critique ...
'''
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') # adds a new (empty) line after each row
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
apple_file = open('AppleStore.csv')
google_store = open('googleplaystore.csv')
from operator import itemgetter
from csv import(reader)
r_apple_file = reader(apple_file)
r_google_store = reader(google_store)
apple_data = list(r_apple_file)
google_data = list(r_google_store)
#print(google_data[0:1]) # get the column names
duplicate_apps = []
duplicate_data_set = []
unique_data_set = []
unique_apps = []
for app in google_data[1:]:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
duplicate_data_set.append(app)
else:
unique_apps.append(name)
unique_data_set.append(app)
count = 0
for app1 in duplicate_data_set:
name = app1[0]
size = app1[3]
for app2 in google_data[1:]:
if name == app2[0] and size > app2[3]:
del app2
count += 1
print('the number of duplicate app names removed = 983')
print('raw google_data Length = 10842')
print('the count of clean android data is 10842 - 983 = 9,659')
print('google_data Length = ' + str(len(google_data)))
print('count = ' + str(count))
print('duplicate_data_set = ' + str(len(duplicate_data_set)))
print('# of duplicate apps =' + str(len(duplicate_apps)))
'''
the number of duplicate app names removed = 983
raw google_data Length = 10842
the count of clean android data is 10842 - 983 = 9,659
google_data Length = 10842
count = 983
duplicate_data_set = 1181
# of duplicate apps =1181
'''
1 Like