Why does this give an error?

Screen Link:
https://app.dataquest.io/m/293/data-cleaning-basics/12/challenge-clean-a-string-column

My Code:

laptops["weight"] = laptops["weight"].str.replace("kg","")
laptops["weight"] = laptops["weight"].str.replace("kgs","")
laptops["weight"] = laptops["weight"].astype(float)

What I expected to happen:

What actually happened:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-097fc3737aff> in <module>
     42 laptops["weight"] = laptops["weight"].str.replace("kg","")
     43 laptops["weight"] = laptops["weight"].str.replace("kgs","")
---> 44 laptops["weight"] = laptops["weight"].astype(float)
     45 laptops.rename({"weight": "weight_kg"}, axis=1, inplace=True)
     46 laptops.to_csv('laptops_cleaned',index=False)

/dataquest/system/env/python3/lib/python3.8/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
   5696         else:
   5697             # else, only a single dtype is given
-> 5698             new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
   5699             return self._constructor(new_data).__finalize__(self)
   5700 

/dataquest/system/env/python3/lib/python3.8/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
    580 
    581     def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582         return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
    583 
    584     def convert(self, **kwargs):

/dataquest/system/env/python3/lib/python3.8/site-packages/pandas/core/internals/managers.py in apply(self, f, filter, **kwargs)
    440                 applied = b.apply(f, **kwargs)
    441             else:
--> 442                 applied = getattr(b, f)(**kwargs)
    443             result_blocks = _extend_blocks(applied, result_blocks)
    444 

/dataquest/system/env/python3/lib/python3.8/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
    623             vals1d = values.ravel()
    624             try:
--> 625                 values = astype_nansafe(vals1d, dtype, copy=True)
    626             except (ValueError, TypeError):
    627                 # e.g. astype_nansafe can fail on object-dtype of strings

/dataquest/system/env/python3/lib/python3.8/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
    895     if copy or is_object_dtype(arr) or is_object_dtype(dtype):
    896         # Explicit copy, or required since NumPy can't view from / to object.
--> 897         return arr.astype(dtype, copy=True)
    898 
    899     return arr.view(dtype)

ValueError: could not convert string to float: '4s'

I figured it out.
Of course, when removing “kg” first, you are left with “s” for the rows with “kgs”, which will not be removed by the next “replace” command. :upside_down_face:

1 Like