Every line of 'pandas read csv only specific columns' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
392 def pandas_read_csv(self, usecols=None, **kwargs): 393 """ Use pandas.read_csv with the right keyword arguments 394 395 In particular we know what dtypes should be, which columns are dates, 396 etc... 397 """ 398 dtypes, dates = dshape_to_pandas(self.schema) 399 400 if usecols: 401 if builtins.all(isinstance(c, int) for c in usecols): 402 usecols = get(usecols, self.columns) 403 dates = [name for name in dates if name in usecols] 404 405 header = kwargs.pop('header', self.header) 406 header = 0 if self.header else None 407 408 result = pd.read_csv(self.path, 409 names=kwargs.pop('names', self.columns), 410 usecols=usecols, 411 compression={'gz': 'gzip', 412 'bz2': 'bz2'}.get(ext(self.path)), 413 dtype=kwargs.pop('dtype', dtypes), 414 parse_dates=kwargs.pop('parse_dates', dates), 415 encoding=kwargs.pop('encoding', self.encoding), 416 header=header, 417 **merge(kwargs, clean_dialect(self.dialect))) 418 419 return result
37 def readcsv(filename, header=True): 38 return pd.read_csv(filename, header=None) if not header else pd.read_csv(filename)
63 def _dataframe_from_csv(reader, delimiter, with_header, skipspace): 64 """Returns csv data as a pandas Dataframe object""" 65 sep = delimiter 66 header = 0 67 if not with_header: 68 header = None 69 70 return pd.read_csv( 71 reader, 72 header=header, 73 sep=sep, 74 skipinitialspace=skipspace, 75 encoding='utf-8-sig' 76 )
554 def _csv_to_pandas_df(filepath, 555 separator=DEFAULT_SEPARATOR, 556 quote_char=DEFAULT_QUOTE_CHARACTER, 557 escape_char=DEFAULT_ESCAPSE_CHAR, 558 contain_headers=True, 559 lines_to_skip=0, 560 date_columns=None, 561 rowIdAndVersionInIndex=True): 562 test_import_pandas() 563 import pandas as pd 564 565 # DATEs are stored in csv as unix timestamp in milliseconds 566 def datetime_millisecond_parser(milliseconds): return pd.to_datetime(milliseconds, unit='ms', utc=True) 567 568 if not date_columns: 569 date_columns = [] 570 571 line_terminator = str(os.linesep) 572 573 df = pd.read_csv(filepath, 574 sep=separator, 575 lineterminator=line_terminator if len(line_terminator) == 1 else None, 576 quotechar=quote_char, 577 escapechar=escape_char, 578 header=0 if contain_headers else None, 579 skiprows=lines_to_skip, 580 parse_dates=date_columns, 581 date_parser=datetime_millisecond_parser) 582 if rowIdAndVersionInIndex and "ROW_ID" in df.columns and "ROW_VERSION" in df.columns: 583 # combine row-ids (in index) and row-versions (in column 0) to 584 # make new row labels consisting of the row id and version 585 # separated by a dash. 586 zip_args = [df["ROW_ID"], df["ROW_VERSION"]] 587 if "ROW_ETAG" in df.columns: 588 zip_args.append(df['ROW_ETAG']) 589 590 df.index = row_labels_from_id_and_version(zip(*zip_args)) 591 del df["ROW_ID"] 592 del df["ROW_VERSION"] 593 if "ROW_ETAG" in df.columns: 594 del df['ROW_ETAG'] 595 596 return df
114 def _pandas_read_csv(filepath, **kwargs): 115 """ 116 Wrapper function around the Pandas read_csv function. 117 :param filepath: The file to read. 118 :type filepath: str, StringIO 119 :param kwargs: Extra key word arguments to be applied. 120 :return: A pandas DataFrame. 121 :rtype: pandas.DataFrame 122 """ 123 try: 124 return pd.read_csv(filepath, **kwargs) 125 except FileNotFoundError: 126 raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath) 127 except Exception as error: 128 raise error
10 def _read_csv_sparse(filename, chunksize=1000000, fill_value=0.0, **kwargs): 11 """Read a csv file into a pd.DataFrame[pd.SparseArray] 12 """ 13 chunks = pd.read_csv(filename, chunksize=chunksize, **kwargs) 14 data = pd.concat( 15 utils.dataframe_to_sparse(chunk, fill_value=fill_value) for chunk in chunks 16 ) 17 return data