Guided Project: Clean And Analyze Employee Exit Surveys, 6, creation of institute_service column, and thresh

to the best of my testing, the institute_service column doesn’t have enough nan values for the dropna+thresh command to have removed it. i can’t figure out why it’s not there any more.

dete_resignations_up['institute']='dete'
tafe_resignations_up['institute']='tafe'
combined = pd.concat([dete_resignations_up, tafe_resignations_up],join = 'outer', axis=0)
combined_updated=combined.dropna(axis='columns',thresh=500)
print(combined_updated.head(3))
combined_updated['institute_service'].value_counts(dropna=False)

returns error message:

KeyErrorTraceback (most recent call last)
/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'institute_service'

During handling of the above exception, another exception occurred:

KeyErrorTraceback (most recent call last)
<ipython-input-62-7967c96ca7f6> in <module>()
----> 1 combined_updated['institute_service'].value_counts(dropna=False)

/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2137             return self._getitem_multilevel(key)
   2138         else:
-> 2139             return self._getitem_column(key)
   2140 
   2141     def _getitem_column(self, key):

/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2144         # get column
   2145         if self.columns.is_unique:
-> 2146             return self._get_item_cache(key)
   2147 
   2148         # duplicate columns & possible reduce dimensionality

/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1840         res = cache.get(item)
   1841         if res is None:
-> 1842             values = self._data.get(item)
   1843             res = self._box_item_values(item, values)
   1844             cache[item] = res

/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3841 
   3842             if not isna(item):
-> 3843                 loc = self.items.get_loc(item)
   3844             else:
   3845                 indexer = np.arange(len(self.items))[isna(self.items)]

/dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2525                 return self._engine.get_loc(key)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 
   2529         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'institute_service'

edit:
after testing, i fixed dete_survey_uptated[‘institute_service’] somewhat, but now i have 549 nat values i don’t understand.

dete_survey_updated['institute_service']=(dete_survey_updated.loc[:,'cease_date'] - dete_survey_updated.loc[:,'dete_start_date'])
print(dete_survey_updated['institute_service'].value_counts(dropna=False))

returns

NaT                           549
15339 days 23:59:59.999997     14
15339 days 23:59:59.999997     11
15339 days 23:59:59.999997      9
15705 days 23:59:59.999997      8
15339 days 23:59:59.999997      8
15339 days 23:59:59.999997      7
15339 days 23:59:59.999997      7
15705 days 23:59:59.999997      6
15705 days 23:59:59.999997      5
15705 days 23:59:59.999997      5
15339 days 23:59:59.999997      5
15705 days 23:59:59.999997      4
15705 days 23:59:59.999997      4
15339 days 23:59:59.999997      4
15705 days 23:59:59.999998      4
15705 days 23:59:59.999997      3
15705 days 23:59:59.999997      3
16070 days 23:59:59.999997      3
15339 days 23:59:59.999998      3
15339 days 23:59:59.999998      3
15948 days 23:59:59.999997      3
15886 days 23:59:59.999997      3
16070 days 23:59:59.999997      3
15339 days 23:59:59.999998      3
16039 days 23:59:59.999997      3
15339 days 23:59:59.999997      3
15339 days 23:59:59.999998      3
15339 days 23:59:59.999998      2
15339 days 23:59:59.999997      2
                             ... 
15339 days 23:59:59.999998      1
15339 days 23:59:59.999998      1
15339 days 23:59:59.999998      1
15339 days 23:59:59.999998      1
16009 days 23:59:59.999997      1
16009 days 23:59:59.999997      1
16009 days 23:59:59.999997      1
16009 days 23:59:59.999997      1
15705 days 23:59:59.999998      1
15705 days 23:59:59.999998      1
15339 days 23:59:59.999998      1
16009 days 23:59:59.999998      1
16039 days 23:59:59.999998      1
16039 days 23:59:59.999998      1
16039 days 23:59:59.999998      1
16039 days 23:59:59.999997      1
16039 days 23:59:59.999997      1
16039 days 23:59:59.999997      1
16039 days 23:59:59.999997      1
16070 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15886 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15948 days 23:59:59.999998      1
15948 days 23:59:59.999997      1
15886 days 23:59:59.999997      1
15886 days 23:59:59.999997      1
Name: institute_service, Length: 135, dtype: int64