TypeError: cannot safely cast non-equivalent float64 to int64

Hey,

im new to coding and im trying AutoClean on a dataset but i keep getting this error: TypeError: cannot safely cast non-equivalent float64 to int64.
According to ChatGPT this error typically occurs when you try to convert a floating-point number to an integer using the "int()" function or a similar method, but the float number is not a whole number, which causes a loss of precision.
But it must be possible to use floats as well right?
So im curious why I might get this error. My code is provided below,
Thanks a lot for any help!

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv('/Users/guyjansen/Desktop/Python/Housing Prices Data Science Project/train.csv')
from AutoClean.autoclean import AutoClean
pipeline = AutoClean(df)
pipeline.output

this raises the error: 
--------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy)
    119     try:
--> 120         return values.astype(dtype, casting="safe", copy=copy)
    121     except TypeError as err:

TypeError: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
/var/folders/wc/2vn5bk3x4hq0b0_hdn9tjzkm0000gn/T/ipykernel_45950/1120075175.py in <module>
      1 from AutoClean.autoclean import AutoClean
----> 2 pipeline = AutoClean(df)
      3 pipeline.output

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in __init__(self, input_data, mode, duplicates, missing_num, missing_categ, encode_categ, extract_datetime, outliers, outlier_param, logfile, verbose)
     80 
     81         # initialize our class and start the autoclean process
---> 82         self.output = self._clean_data(output_data, input_data)
     83 
     84         end = timer()

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in _clean_data(self, df, input_data)
    141         df = Duplicates.handle(self, df)
    142         df = MissingValues.handle(self, df)
--> 143         df = Outliers.handle(self, df)
    144         df = Adjust.convert_datetime(self, df)
    145         df = EncodeCateg.handle(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in handle(self, df)
    272 
    273             if self.outliers in ['auto', 'winz']:
--> 274                 df = Outliers._winsorization(self, df)
    275             elif self.outliers == 'delete':
    276                 df = Outliers._delete(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in _winsorization(self, df)
    300                     else:
    301                         if (df[feature].fillna(-9999) % 1  == 0).all():
--> 302                             df.loc[row_index, feature] = upper_bound
    303                             df[feature] = df[feature].astype(int)
    304                         else:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
    714 
    715         iloc = self if self.name == "iloc" else self.obj.iloc
--> 716         iloc._setitem_with_indexer(indexer, value, self.name)
    717 
    718     def _validate_key(self, key, axis: int):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name)
   1689         if take_split_path:
   1690             # We have to operate column-wise
-> 1691             self._setitem_with_indexer_split_path(indexer, value, name)
   1692         else:
   1693             self._setitem_single_block(indexer, value, name)

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
   1782             # scalar value
   1783             for loc in ilocs:
-> 1784                 self._setitem_single_column(loc, value, pi)
   1785 
   1786     def _setitem_with_indexer_2d_value(self, indexer, value):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_single_column(self, loc, value, plane_indexer)
   1888 
   1889             orig_values = ser._values
-> 1890             ser._mgr = ser._mgr.setitem((pi,), value)
   1891 
   1892             if ser._values is orig_values:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in setitem(self, indexer, value)
    335         For SingleBlockManager, this backs s[indexer] = value
    336         """
--> 337         return self.apply("setitem", indexer=indexer, value=value)
    338 
    339     def putmask(self, mask, new, align: bool = True):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs)
    302                     applied = b.apply(f, **kwargs)
    303                 else:
--> 304                     applied = getattr(b, f)(**kwargs)
    305             except (TypeError, NotImplementedError):
    306                 if not ignore_failures:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in setitem(self, indexer, value)
   1620 
   1621         check_setitem_lengths(indexer, value, self.values)
-> 1622         self.values[indexer] = value
   1623         return self
   1624 

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py in __setitem__(self, key, value)
    222         if _is_scalar:
    223             value = [value]
--> 224         value, mask = self._coerce_to_array(value)
    225 
    226         if _is_scalar:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in _coerce_to_array(self, value)
    334 
    335     def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
--> 336         return coerce_to_array(value, dtype=self.dtype)
    337 
    338     @overload

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in coerce_to_array(values, dtype, mask, copy)
    228         values = values.astype(dtype, copy=copy)
    229     else:
--> 230         values = safe_cast(values, dtype, copy=False)
    231 
    232     return values, mask

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy)
    124             return casted
    125 
--> 126         raise TypeError(
    127             f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
    128         ) from err

TypeError: cannot safely cast non-equivalent float64 to int64
​


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TypeError: cannot safely cast non-equivalent float64 to int64 #12

this raises the error:

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

TypeError: cannot safely cast non-equivalent float64 to int64 #12

Description

this raises the error:

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions