to the best of my testing, the institute_service column doesn’t have enough nan values for the dropna+thresh command to have removed it. i can’t figure out why it’s not there any more.
dete_resignations_up['institute']='dete'
tafe_resignations_up['institute']='tafe'
combined = pd.concat([dete_resignations_up, tafe_resignations_up],join = 'outer', axis=0)
combined_updated=combined.dropna(axis='columns',thresh=500)
print(combined_updated.head(3))
combined_updated['institute_service'].value_counts(dropna=False)
returns error message:
KeyErrorTraceback (most recent call last)
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'institute_service'
During handling of the above exception, another exception occurred:
KeyErrorTraceback (most recent call last)
<ipython-input-62-7967c96ca7f6> in <module>()
----> 1 combined_updated['institute_service'].value_counts(dropna=False)
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
2137 return self._getitem_multilevel(key)
2138 else:
-> 2139 return self._getitem_column(key)
2140
2141 def _getitem_column(self, key):
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2144 # get column
2145 if self.columns.is_unique:
-> 2146 return self._get_item_cache(key)
2147
2148 # duplicate columns & possible reduce dimensionality
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1840 res = cache.get(item)
1841 if res is None:
-> 1842 values = self._data.get(item)
1843 res = self._box_item_values(item, values)
1844 cache[item] = res
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3841
3842 if not isna(item):
-> 3843 loc = self.items.get_loc(item)
3844 else:
3845 indexer = np.arange(len(self.items))[isna(self.items)]
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'institute_service'
edit:
after testing, i fixed dete_survey_uptated[‘institute_service’] somewhat, but now i have 549 nat values i don’t understand.
dete_survey_updated['institute_service']=(dete_survey_updated.loc[:,'cease_date'] - dete_survey_updated.loc[:,'dete_start_date'])
print(dete_survey_updated['institute_service'].value_counts(dropna=False))
returns
NaT 549
15339 days 23:59:59.999997 14
15339 days 23:59:59.999997 11
15339 days 23:59:59.999997 9
15705 days 23:59:59.999997 8
15339 days 23:59:59.999997 8
15339 days 23:59:59.999997 7
15339 days 23:59:59.999997 7
15705 days 23:59:59.999997 6
15705 days 23:59:59.999997 5
15705 days 23:59:59.999997 5
15339 days 23:59:59.999997 5
15705 days 23:59:59.999997 4
15705 days 23:59:59.999997 4
15339 days 23:59:59.999997 4
15705 days 23:59:59.999998 4
15705 days 23:59:59.999997 3
15705 days 23:59:59.999997 3
16070 days 23:59:59.999997 3
15339 days 23:59:59.999998 3
15339 days 23:59:59.999998 3
15948 days 23:59:59.999997 3
15886 days 23:59:59.999997 3
16070 days 23:59:59.999997 3
15339 days 23:59:59.999998 3
16039 days 23:59:59.999997 3
15339 days 23:59:59.999997 3
15339 days 23:59:59.999998 3
15339 days 23:59:59.999998 2
15339 days 23:59:59.999997 2
...
15339 days 23:59:59.999998 1
15339 days 23:59:59.999998 1
15339 days 23:59:59.999998 1
15339 days 23:59:59.999998 1
16009 days 23:59:59.999997 1
16009 days 23:59:59.999997 1
16009 days 23:59:59.999997 1
16009 days 23:59:59.999997 1
15705 days 23:59:59.999998 1
15705 days 23:59:59.999998 1
15339 days 23:59:59.999998 1
16009 days 23:59:59.999998 1
16039 days 23:59:59.999998 1
16039 days 23:59:59.999998 1
16039 days 23:59:59.999998 1
16039 days 23:59:59.999997 1
16039 days 23:59:59.999997 1
16039 days 23:59:59.999997 1
16039 days 23:59:59.999997 1
16070 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15886 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15948 days 23:59:59.999998 1
15948 days 23:59:59.999997 1
15886 days 23:59:59.999997 1
15886 days 23:59:59.999997 1
Name: institute_service, Length: 135, dtype: int64