#####################################################################################################
#This R script lets you download, state-specific company data from the https://data.gov.in/ portal
#I have chosen Karnataka company data and I downloaded the data into 'Kar_Ind_Master.csv' file
#####################################################################################################
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.5
library('xlsx') 
## Loading required package: rJava
## Loading required package: xlsxjars
Kar_All_Ind <- read.csv(file = "Kar_Ind_Master.csv",head = TRUE,sep = ",")
dim(Kar_All_Ind)
## [1] 78524    13
names(Kar_All_Ind)
##  [1] "CORPORATE_IDENTIFICATION_NUMBER" "DATE_OF_REGISTRATION"           
##  [3] "COMPANY_NAME"                    "COMPANY_STATUS"                 
##  [5] "COMPANY_CLASS"                   "COMPANY_CATEGORY"               
##  [7] "AUTHORIZED_CAPITAL"              "PAIDUP_CAPITAL"                 
##  [9] "REGISTERED_STATE"                "REGISTRAR_OF_COMPANIES"         
## [11] "PRINCIPAL_BUSINESS_ACTIVITY"     "REGISTERED_OFFICE_ADDRESS"      
## [13] "SUB_CATEGORY"
#Filter out dormant and inactive companies in the list
Only_Active__Kar_All_Ind <- subset(Kar_All_Ind, COMPANY_STATUS %in% c("ACTIVE","ACTIVE IN PROGRESS","AMALGAMATED","CONVERTED TO LLP"))
dim(Only_Active__Kar_All_Ind)
## [1] 54382    13
#Now filter out the registered office address for the string 'Bangalore'
Active_KN_Subset <- Only_Active__Kar_All_Ind[grep("BANGALORE", Only_Active__Kar_All_Ind$REGISTERED_OFFICE_ADDRESS), ]
#Now plot those companies in Bangalore based on their principal business activity
ggplot(Active_KN_Subset,  aes(x = "Business", fill = PRINCIPAL_BUSINESS_ACTIVITY)) + geom_bar()