107MP
0
Q:

clean crsp pandas

# How to clean CRSP the WRDS database, to calculate Mkt Cap on non financial US firms

CRSP = CRSP[CRSP['SHRCLS'].isin(['NaN', 'A'])] # Keep Share class A or missing 
CRSP = CRSP[CRSP['SHRCD'].isin(['10', '11'])] # Keep only sharecode of 10 and 11
CRSP['RET'] = CRSP['RET'].replace(['C','B'],np.nan) # Clean return taking out strings 

# Keep value above -50 to avoid any errors 
CRSP['RET'] = CRSP['RET'].astype('float')
mask = CRSP['RET'] > -50
CRSP = CRSP[mask]

CRSP['PRC'] = CRSP['PRC'].abs() # Keep absolute value of Price 

# Calculate Market value with adjustment in Price and Market Value
CRSP['market_value'] = ((CRSP['PRC']/CRSP['CFACPR'])*(CRSP['SHROUT']*CRSP['CFACSHR'])).shift(1)

# Take out SIC code with letter z and keep only rows with a SIC 
mask_z = CRSP['SICCD'] == 'Z'
CRSP['SICCD'] = CRSP['SICCD'][-mask_z]
CRSP['SICCD'] = CRSP['SICCD'].dropna().astype(int)

# Keep non financial firms only
CRSP = CRSP[~CRSP['SICCD'].between(6000,6999)]

# Change date type from object to datetime and create new colums for month and year
CRSP['date'] = pd.to_datetime(CRSP['date'])
CRSP['year'] = pd.DatetimeIndex(CRSP['date']).year
CRSP['month'] = pd.DatetimeIndex(CRSP['date']).month

# Clean CRSP
CRSP.drop(['SHRCD','SHRCLS','PRC','SHROUT','CFACPR','CFACSHR'], axis=1, inplace = True)
1

New to Communities?

Join the community