diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 0000000..4baa8b8
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,19 @@
+name: Pytest
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ test-pandas-read-in:
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ shell: bash -el {0}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: conda-incubator/setup-miniconda@v3
+ with:
+ environment-file: environment.yml
+ activate-environment: pv-code-examples
+ - run: pytest
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index b5f2b45..0f367fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,6 @@
-.ipynb_checkpoints
.DS_Store
+*.tsv
+*.zip
+*.pyc
+__pycache__
+.pytest_cache
\ No newline at end of file
diff --git a/01_bulk_download_example_joins/README.md b/01_bulk_download_example_joins/README.md
deleted file mode 100644
index 671a002..0000000
--- a/01_bulk_download_example_joins/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Bulk Download Files: Example Joins
-
-The code scripts in this folder walk through the joining of various bulk download files:
-
-| Script | Description |
-| --- |--- |
-| country_ipc_selection.R | *Demonstrates using ipc and location to select patents* |
-| join_assignee.Rmd | *Describes joining of assignee, location, and patent tables*|
-| join_assignee.html | *HTML version of join_assignee.Rmd*|
-| join_inventor.Rmd | *Describes joining of inventor, location, and patent tables*|
-| join_inventor.html | *HTML version of join_inventor.Rmd*|
-
-Note:
-
-You will need to set your working directory/folder path in the r setup block in the .Rmd scripts above. Here is the line you will need to change:
-
- knitr::opts_knit$set(root.dir = "TODO: ADD DIRECTORY PATH HERE")
\ No newline at end of file
diff --git a/01_bulk_download_example_joins/country_ipc_selection.R b/01_bulk_download_example_joins/country_ipc_selection.R
deleted file mode 100755
index 964bf51..0000000
--- a/01_bulk_download_example_joins/country_ipc_selection.R
+++ /dev/null
@@ -1,77 +0,0 @@
-# imports
-library(fastmatch)
-library(dplyr)
-library(tidyr)
-library(stringr)
-
-
-# download and unzip relevant files from bulk download site
-# for additional file links, see site: https://patentsview.org/download/data-download-tables
-ipc_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/ipcr.tsv.zip"
-patent_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent.tsv.zip"
-raw_location_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/rawlocation.tsv.zip"
-other_applicant_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/non_inventor_applicant.tsv.zip"
-pat_asgn_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent_assignee.tsv.zip"
-pat_inv_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent_inventor.tsv.zip"
-
-dl_folder = "Path/To/My/Download/Folder"
-
-for (link in c(ipc_table_url, patent_table_url, raw_location_table_url, other_applicant_table_url, pat_asgn_url, pat_inv_url)) {
- fnam = tail(str_split(link, '/')[[1]], 1)
- download.file(link, paste0(dl_folder,'/',fnam), method = "curl")
- unzip(paste0(dl_folder,'/',fnam), exdir = dl_folder)
-}
-
-# faster %in% implementation to speed up repeated use below
-# credit: https://stackoverflow.com/questions/32934933/faster-in-operator
-`%fin%` <- function(x, table) {
- stopifnot(require(fastmatch))
- fmatch(x, table, nomatch = 0L) > 0L
-}
-
-#read in files and start filtering
-#start with location and build from there
-locs <- read.table(file = paste0(dl_folder,'/rawlocation.tsv'), header = T, sep = '\t') %>%
- select(id, location_id, country) %>% #only need id and country columns
- filter(country == "IN") # ISO alpha-2 code for India
-# for other country codes, see https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
-
-# next pick out the inventors, applicants, and assignees who have one of these location ids
-invs_in_india <- read.table(file = paste0(dl_folder, '/patent_inventor.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>%
- filter(location_id %fin% locs$location_id)
-
-other_applics_in_india <- read.table(file = paste0(dl_folder, '/non_inventor_applicant.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>%
- select(patent_id, rawlocation_id) %>%
- filter(rawlocation_id %fin% locs$id)
-
-asgns_in_india <- read.table(file = paste0(dl_folder, '/patent_assignee.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>%
- filter(location_id %fin% locs$location_id)
-
-#now we can combine these lists of patents and remove duplicates
-
-country_patlist <- c(invs_in_india$patent_id, asgns_in_india$patent_id, other_applics_in_india$patent_id)
-country_patlist <- country_patlist[!duplicated(country_patlist)]
-
-# you can now optionally delete the above data frames to clear up some memory
-rm(invs_in_india, other_applics_in_india, asgns_in_india)
-
-# next we'll filter these to the ones with desired IPC codes
-good_ipc3 <- c('C07', 'C08', 'C12')
-good_ipc4 <- c('A61K', 'A61P', 'C40B')
-
-final_patlist <- read.table(file = paste0(dl_folder,'/ipcr.tsv'), header = T, sep = '\t') %>%
- select(patent_id, section, ipc_class, subclass) %>%
- filter(patent_id %fin% country_patlist) %>%
- mutate(ipc3 = paste0(section,ipc_class), ipc4 = paste0(section,ipc_class,subclass)) %>%
- filter((ipc3 %fin% good_ipc3)|(ipc4 %fin% good_ipc4)) %>%
- select(patent_id)
-
-# this should be a complete list of the patents that match your desired country and IPC codes
-# from here you should be able to join any additional tables to get your desired full dataset
-# e.g.
-
-mydata <- patlist %>%
- merge(read.table(file = paste0(dl_folder, '/patent.tsv'), header = T, sep = '\t'), by.x=patent_id, by.y=id, all.x=T)
-
-# and export if desired:
-write.csv(mydata, paste0(dl_folder,'/mydata.csv'), row.names = F)
\ No newline at end of file
diff --git a/01_bulk_download_example_joins/join_assignee.Rmd b/01_bulk_download_example_joins/join_assignee.Rmd
deleted file mode 100644
index dc67996..0000000
--- a/01_bulk_download_example_joins/join_assignee.Rmd
+++ /dev/null
@@ -1,124 +0,0 @@
----
-title: "Merge Assignee, Location, and Patent Tables"
-output: html_document
----
-#### Note: This HTML document was last generated on `r Sys.Date()`
-
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_knit$set(root.dir = "TODO: Add Directory Path Here")
-
-library(data.table)
-library(scales)
-```
-
-## Load Patent Table
-```{r, warning=FALSE }
-patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-```
-
-## Load Assignee and Location Tables
-```{r}
-assignee <- fread("assignee.tsv")
-location <- fread("location.tsv")
-
-```
-```{r}
-head(assignee)
-head(location)
-
-```
-
-## Load Raw Location and Raw Assignee Tables
-```{r, warning=FALSE}
-
-rawassignee <- fread("rawassignee.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
-```
-```{r}
-head(rawassignee)
-head(rawlocation)
-```
-
-## Join Raw Assignee and Assignee Tables
-```{r}
-merged_assignees <- merge(rawassignee, assignee, by.x = c('assignee_id', 'type', 'name_first', 'name_last', 'organization'), by.y = c('id','type', 'name_first', 'name_last', 'organization'))
-```
-
-```{r}
-colnames(merged_assignees)[colnames(merged_assignees)=="type"] <- "assignee_type"
-
-head(merged_assignees)
-
-n_non_matches <- nrow(rawassignee) - nrow(merged_assignees)
-
-```
-
-The assignee table contains `r comma_format()(nrow(assignee))` rows while the rawassignee table contains `r comma_format()(nrow(rawassignee))` rows. The merging of these tables results in a table with `r comma_format()(nrow(merged_assignees))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawassignee table which do not contain matches to the assignee table on the specified columns.
-
-## Join Raw Location and Location Tables
-```{r}
-head(location)
-```
-
-```{r}
-colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-```
-
-
-```{r}
-merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-```
-
-
-
-```{r}
-head(merged_locations)
-
-n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-
-```
-
-
-The location table contains `r comma_format()(nrow(location))` rows while the rawlocation table contains `r comma_format()(nrow(rawlocation))`rows. Merging these tables results in a merged_locations table of `r comma_format()(nrow(merged_locations))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.
-
-## Join Merged_Locations and Merged_Assignees
-```{r}
-merged_assignee_location <- merge(merged_locations, merged_assignees, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-```
-
-```{r}
-colnames(merged_assignee_location)[colnames(merged_assignee_location)=="type"] <- "assignee_type"
-head(merged_assignee_location)
-```
-
-## Join Assignee, Location, and Patent Data
-```{r}
-
-head(patent)
-```
-
-```{r}
-# free up some memory for final merge
-rm(rawassignee)
-rm(rawlocation)
-rm(assignee)
-rm(location)
-rm(merged_locations)
-
-complete_merge <- merge(patent, merged_assignee_location, by.x = 'patent_id', by.y = 'patent_id' )
-```
-
-```{r}
-head(complete_merge)
-```
-
-The merged_assignee_location table contains `r comma_format()(nrow(merged_assignee_location))` rows and the patent table contains `r comma_format()(nrow(patent))` rows. The complete_merge contains `r comma_format()(nrow(complete_merge))` rows. Thus, all of the patent_ids in the merged_assignee_location table have matches in the patent table.
-
diff --git a/01_bulk_download_example_joins/join_assignee.html b/01_bulk_download_example_joins/join_assignee.html
deleted file mode 100644
index a817e41..0000000
--- a/01_bulk_download_example_joins/join_assignee.html
+++ /dev/null
@@ -1,558 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Note: This HTML document was last generated on 2020-01-14
-
-
-
Load Patent Table
-
patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-
## patent_id patent_type number patent_country date
-## 1: 10000000 utility 10000000 US 2018-06-19
-## 2: 10000001 utility 10000001 US 2018-06-19
-## 3: 10000002 utility 10000002 US 2018-06-19
-## 4: 10000003 utility 10000003 US 2018-06-19
-## 5: 10000004 utility 10000004 US 2018-06-19
-## 6: 10000005 utility 10000005 US 2018-06-19
-## abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5: The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6: A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-## title
-## 1: Coherent LADAR using intra-pixel quadrature detection
-## 2: Injection molding machine and mold thickness control method
-## 3: Method for manufacturing polymer film and co-extruded film
-## 4: Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6: Article vacuum formation method and vacuum forming apparatus
-## kind num_claims filename withdrawn
-## 1: B2 20 ipg180619.xml NULL
-## 2: B2 12 ipg180619.xml NULL
-## 3: B2 9 ipg180619.xml NULL
-## 4: B2 18 ipg180619.xml NULL
-## 5: B2 6 ipg180619.xml NULL
-## 6: B2 4 ipg180619.xml NULL
-
-
-
Load Assignee and Location Tables
-
assignee <- fread("assignee.tsv")
-location <- fread("location.tsv")
-
head(assignee)
-
## id type name_first name_last
-## 1: 004bvflbsd8k0pjiga6l1hdm2 2 NULL NULL
-## 2: 00aw1csuxll9yyqi67qq9m91q 2 NULL NULL
-## 3: 00gkqk53hjao0mroltu3oo1qk 2 NULL NULL
-## 4: 00grlqxhvjz0l7ylc5okvvtbg 2 NULL NULL
-## 5: 00iquis3fn9yu6i0n8ojxex4c 2 NULL NULL
-## 6: 00qivxg405hst268eg56psex6 2 NULL NULL
-## organization
-## 1: GM Global Technology Operations LLC
-## 2: AT&T Intellectual Property II, L.P.
-## 3: McAfee, Inc.
-## 4: Sequa Corporation
-## 5: CENTER CROSS ARCHERY LLC
-## 6: Exceptional IP Holdings, LLC
-
head(location)
-
## id city state country latitude longitude county
-## 1: 000ii62xlraz Esparto CA US 38.6922 -122.0160 Yolo
-## 2: 000iprvfzu2f Limm NULL GB 11.1000 30.5167 NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe NULL DE 53.3763 10.5497 NULL
-## 4: 000osf8n2ysw Bouxwiller NULL FR 47.5051 7.3453 NULL
-## 5: 0014a0zfh259 Morance NULL FR 45.8978 4.7004 NULL
-## 6: 0018uf8e2x0f Dals Langed NULL SE 58.9167 12.3000 NULL
-## state_fips county_fips
-## 1: 6 6113
-## 2: NULL NULL
-## 3: NULL NULL
-## 4: NULL NULL
-## 5: NULL NULL
-## 6: NULL NULL
-
-
-
Load Raw Location and Raw Assignee Tables
-
rawassignee <- fread("rawassignee.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
head(rawassignee)
-
## uuid patent_id assignee_id
-## 1: 0000p94wkezw94s8cz7dbxlvz 5856666 org_fijoKOoRhIzrkYzecWF9
-## 2: 00013vk881wap9u4mbo7lwwhp 5204210 org_UrbE3xev7LUsnuvRjbep
-## 3: 000192sn2u10kzpikl4s7h3r0 5302149 org_JcXwBlJtb1uvcPKHeaYX
-## 4: 0001ycvv6sz1ju07ss99nhxi1 9104354 org_7fE5f5nnY6dbOc3vSaXb
-## 5: 0001z7ws4m14aqdb3tv99u550 6584517 org_sj7olrHxASyJDNVGczBe
-## 6: 0002ca4n6l0hop2dycg28wx3g 6970439 org_E6m1uPIsSbg4FlEl54lY
-## rawlocation_id type name_first name_last
-## 1: orskbf54s58e97lkmw8na5rpx 2 NULL NULL
-## 2: mue862v5lcjdhzqqk86ei75kj 2 NULL NULL
-## 3: o1h9dqdv0yq7dt1b1vmrcal9h 3 NULL NULL
-## 4: rspbpqcajvm09r1ew9mgnpx37 3 NULL NULL
-## 5: l1gyelp5jcg0hakk9smmhsdgr 2 NULL NULL
-## 6: 4s9aa5btsexwstspmj5ta5a6j 2 NULL NULL
-## organization sequence
-## 1: U.S. Philips Corporation 0
-## 2: Xerox Corporation 0
-## 3: Commonwealth Scientific & Industrial Research Organization 1
-## 4: Canon Kabushiki Kaisha 0
-## 5: Cypress Semiconductor Corp. 0
-## 6: Motorola, Inc. 0
-
head(rawlocation)
-
## id location_id city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip Sunnyvale CA US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz Seoul NULL KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge LA US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z Osaka NULL JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w Austin TX US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z Nundah NULL AU
-## latlong
-## 1: 37.3689|-122.0353
-## 2: 37.5985|126.9783
-## 3: 30.4506|-91.1544
-## 4: 34.6937|135.5022
-## 5: 30.2669|-97.7428
-## 6: -27.4033|153.0603
-
-
-
Join Raw Assignee and Assignee Tables
-
merged_assignees <- merge(rawassignee, assignee, by.x = c('assignee_id', 'type', 'name_first', 'name_last', 'organization'), by.y = c('id','type', 'name_first', 'name_last', 'organization'))
-
colnames(merged_assignees)[colnames(merged_assignees)=="type"] <- "assignee_type"
-
-head(merged_assignees)
-
## assignee_id assignee_type name_first name_last
-## 1: 004bvflbsd8k0pjiga6l1hdm2 2 NULL NULL
-## 2: 00aw1csuxll9yyqi67qq9m91q 2 NULL NULL
-## 3: 00gkqk53hjao0mroltu3oo1qk 2 NULL NULL
-## 4: 00grlqxhvjz0l7ylc5okvvtbg 2 NULL NULL
-## 5: 00iquis3fn9yu6i0n8ojxex4c 2 NULL NULL
-## 6: 00qivxg405hst268eg56psex6 2 NULL NULL
-## organization uuid patent_id
-## 1: GM Global Technology Operations LLC pfqsbx9cud7900bsf30rpqhbn 8499738
-## 2: AT&T Intellectual Property II, L.P. 35u392dttglj2tk1lywtepfkq 7936730
-## 3: McAfee, Inc. q3at72duk25a02ujn35soumry 9148422
-## 4: Sequa Corporation zsrq73v0qr24hjmv3yms09xse 7246474
-## 5: CENTER CROSS ARCHERY LLC go5lrbk3yph01byne9xojx4w3 9683819
-## 6: Exceptional IP Holdings, LLC 3hbckl4pd9cyrqunipb21jobr 9296121
-## rawlocation_id sequence
-## 1: NULL 0
-## 2: NULL 0
-## 3: NULL 0
-## 4: NULL 0
-## 5: NULL 0
-## 6: NULL 0
-
n_non_matches <- nrow(rawassignee) - nrow(merged_assignees)
-
The assignee table contains 486,381 rows while the rawassignee table contains 6,387,373 rows. The merging of these tables results in a table with 5,082,070 rows. Thus there are 1,305,303 rows in the rawassignee table which do not contain matches to the assignee table on the specified columns.
-
-
-
Join Raw Location and Location Tables
-
head(location)
-
## id city state country latitude longitude county
-## 1: 000ii62xlraz Esparto CA US 38.6922 -122.0160 Yolo
-## 2: 000iprvfzu2f Limm NULL GB 11.1000 30.5167 NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe NULL DE 53.3763 10.5497 NULL
-## 4: 000osf8n2ysw Bouxwiller NULL FR 47.5051 7.3453 NULL
-## 5: 0014a0zfh259 Morance NULL FR 45.8978 4.7004 NULL
-## 6: 0018uf8e2x0f Dals Langed NULL SE 58.9167 12.3000 NULL
-## state_fips county_fips
-## 1: 6 6113
-## 2: NULL NULL
-## 3: NULL NULL
-## 4: NULL NULL
-## 5: NULL NULL
-## 6: NULL NULL
-
colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-
## rawlocation_id location_id city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip Sunnyvale CA US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz Seoul NULL KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge LA US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z Osaka NULL JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w Austin TX US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z Nundah NULL AU
-## latlong
-## 1: 37.3689|-122.0353
-## 2: 37.5985|126.9783
-## 3: 30.4506|-91.1544
-## 4: 34.6937|135.5022
-## 5: 30.2669|-97.7428
-## 6: -27.4033|153.0603
-
merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-
head(merged_locations)
-
## location_id city state country rawlocation_id
-## 1: 000ii62xlraz Esparto CA US 09g0uz60wjm5ynib3xa28z8bn
-## 2: 000ii62xlraz Esparto CA US 0fyd31upf36smdme7oms1nhzi
-## 3: 000ii62xlraz Esparto CA US 2e4xhj4rjkwvu4gydwyiojzea
-## 4: 000ii62xlraz Esparto CA US 2j0ps8x68j7wsajxzxvsg6txx
-## 5: 000ii62xlraz Esparto CA US 49st0yptqxr9b2ecbp8l3eq97
-## 6: 000ii62xlraz Esparto CA US 4fgmrp96itwtsla9tglzqu0gb
-## latlong latitude longitude county state_fips county_fips
-## 1: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 2: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 3: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 4: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 5: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 6: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-
n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-
The location table contains 142,188 rows while the rawlocation table contains 27,460,928rows. Merging these tables results in a merged_locations table of 25,410,691 rows. Thus there are 2,050,237 rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.
-
-
-
Join Merged_Locations and Merged_Assignees
-
merged_assignee_location <- merge(merged_locations, merged_assignees, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-
colnames(merged_assignee_location)[colnames(merged_assignee_location)=="type"] <- "assignee_type"
-head(merged_assignee_location)
-
## rawlocation_id location_id city state country
-## 1: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz Seoul NULL KR
-## 2: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z Osaka NULL JP
-## 3: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w Austin TX US
-## 4: 00012bpxm3zw9ic3fntkt2y0n 4kqu1ib0ozsk Washington DC US
-## 5: 0001jr4pe4nf6judjbjf7ni7j 4bzg3k4qm96v Carol Stream IL US
-## 6: 0001pjj8tpsgixdh0w2vzkf6l 0yaxi23jdpoo Bath NULL GB
-## latlong latitude longitude county state_fips
-## 1: 37.5985|126.9783 37.5985 126.9780 NULL NULL
-## 2: 34.6937|135.5022 34.6937 135.5020 NULL NULL
-## 3: 30.2669|-97.7428 30.2669 -97.7428 Travis 48
-## 4: 38.895|-77.0367 38.8950 -77.0367 District of Columbia 11
-## 5: 41.9125|-88.1347 41.9125 -88.1347 DuPage 17
-## 6: 51.3794|-2.3656 51.3794 -2.3656 NULL NULL
-## county_fips assignee_id assignee_type name_first name_last
-## 1: NULL org_FRO2wdVwzz55rpoigYJU 3 NULL NULL
-## 2: NULL org_Y0pGhGHfz4S9myhFkrDo 3 NULL NULL
-## 3: 48453 org_OAbJ9MgUqayvWClZtJkP 2 NULL NULL
-## 4: 11001 org_EZcyn2HosNuH0Dg5xTai 6 NULL NULL
-## 5: 17043 org_lcg2WfQGP885kmwEebaG 2 NULL NULL
-## 6: NULL org_1Aaa2HuQFtcHPNU453yR 3 NULL NULL
-## organization
-## 1: Kia Motors Corporation
-## 2: Panasonic Corporation
-## 3: Freescale Semiconductor, Inc.
-## 4: The United States of America as represented by the Secretary of Commerce
-## 5: Maremont Corporation
-## 6: Mindspeed Technologies U.K., Limited
-## uuid patent_id sequence
-## 1: u7whuasvhjt0ogf11wpkgzjqe 10308144 1
-## 2: an1w3xckrtghahpevy3rnhonk 7535814 0
-## 3: qf877chvhg50wvjl46nfwxmeq 7795674 0
-## 4: vrcvz7aqi5jew5669gtz3vgbo 4672851 0
-## 5: 23wrusl8imqex9g1bjj79etja 4460073 0
-## 6: yf2x9phh517ejjfa98iudu97c 8352955 0
-
-
-
Join Assignee, Location, and Patent Data
-
head(patent)
-
## patent_id patent_type number patent_country date
-## 1: 10000000 utility 10000000 US 2018-06-19
-## 2: 10000001 utility 10000001 US 2018-06-19
-## 3: 10000002 utility 10000002 US 2018-06-19
-## 4: 10000003 utility 10000003 US 2018-06-19
-## 5: 10000004 utility 10000004 US 2018-06-19
-## 6: 10000005 utility 10000005 US 2018-06-19
-## abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5: The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6: A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-## title
-## 1: Coherent LADAR using intra-pixel quadrature detection
-## 2: Injection molding machine and mold thickness control method
-## 3: Method for manufacturing polymer film and co-extruded film
-## 4: Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6: Article vacuum formation method and vacuum forming apparatus
-## kind num_claims filename withdrawn
-## 1: B2 20 ipg180619.xml NULL
-## 2: B2 12 ipg180619.xml NULL
-## 3: B2 9 ipg180619.xml NULL
-## 4: B2 18 ipg180619.xml NULL
-## 5: B2 6 ipg180619.xml NULL
-## 6: B2 4 ipg180619.xml NULL
-
# free up some memory for final merge
-rm(rawassignee)
-rm(rawlocation)
-rm(assignee)
-rm(location)
-rm(merged_locations)
-
-complete_merge <- merge(patent, merged_assignee_location, by.x = 'patent_id', by.y = 'patent_id' )
-
head(complete_merge)
-
## patent_id patent_type number patent_country date
-## 1: 10000000 utility 10000000 US 2018-06-19
-## 2: 10000004 utility 10000004 US 2018-06-19
-## 3: 10000007 utility 10000007 US 2018-06-19
-## 4: 10000008 utility 10000008 US 2018-06-19
-## 5: 10000010 utility 10000010 US 2018-06-19
-## 6: 10000011 utility 10000011 US 2018-06-19
-## abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2: The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 3: An expanding tool comprising: an actuator comprising a cylindrical housing that defines an actuator housing cavity; a primary ram disposed within the actuator housing cavity, the primary ram defining an internal primary ram cavity; a secondary ram disposed within the internal primary ram cavity; a cam roller carrier coupled to a distal end of the secondary ram; a drive collar positioned within a distal end of the actuator housing cavity; a roller clutch disposed within an internal cavity defined by an inner surface of the drive collar; a shuttle cam positioned between the roller clutch and a distal end of the primary ram; an expander cone coupled to the primary ram; and an expander head operably coupled to the drive collar.
-## 4: A decorated strip of coated, heat-shrinkable, plastic sheet material is placed in a spiral slot formed in a silicone rubber mold. The spiral slot is defined by a spiral wall having a uniform wall thickness. Upon heating in an oven, the material shrinks, forming a resiliently expansible arc-shaped band that can be worn as a bracelet or wristband.
-## 5: 3-D printing system include development stations positioned to electrostatically transfer build and support materials to an intermediate transfer surface, a transfer station adjacent the intermediate transfer surface, guides adjacent the transfer station, and platens moving on the guides. The guides are shaped to direct the platens to repeatedly pass the transfer station and come in contact with the intermediate transfer surface at the transfer station. The intermediate transfer surface transfers a layer of the build and support materials to the platens each time the platens contact the intermediate transfer surface at the transfer station to successively form layers of the build and support materials on the platens. The platens and the intermediate transfer surface include rack and pinion structures that temporarily join at the transfer station, as the platens pass the transfer station, to align the platens with the intermediate transfer surface as the platens contact the intermediate transfer surface.
-## 6: To reduce distortion in an additively manufactured part, a shrinking platform is formed from a metal particulate filler in a debindable matrix. Shrinking supports of the same material are formed above the shrinking platform, and a desired part of the same material is formed upon them. A sliding release layer is provided below the shrinking platform of equal or larger surface area than a bottom of the shrinking platform to lateral resistance between the shrinking platform and an underlying surface. The matrix is debound sufficient to form a shape-retaining brown part assembly including the shrinking platform, shrinking supports, and the desired part. The shape-retaining brown part assembly is heated to shrink all of the components together at a same rate via atomic diffusion.
-## title
-## 1: Coherent LADAR using intra-pixel quadrature detection
-## 2: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 3: PEX expanding tool
-## 4: Bracelet mold and method of use
-## 5: 3-D electrostatic printer using rack and pinion registration system
-## 6: Supports for sintering additively manufactured parts
-## kind num_claims filename withdrawn rawlocation_id
-## 1: B2 20 ipg180619.xml NULL r7ep1i9cnoxp5af0i8in6li7s
-## 2: B2 6 ipg180619.xml NULL 6rehm9medwxuoreqbg7ded05v
-## 3: B2 24 ipg180619.xml NULL pcmrojq59hh42w1zx55c4vtnq
-## 4: B2 11 ipg180619.xml NULL jf04zgy27bbhss9zf7du9t13x
-## 5: B2 20 ipg180619.xml NULL 2xq5n6vql58bj2imf6qntrxas
-## 6: B1 30 ipg180619.xml NULL 85q4hydnb4k5nsztw1ldr5s6o
-## location_id city state country latlong latitude
-## 1: sz07iwmerndr Waltham MA US 42.3764|-71.2361 42.3764
-## 2: 7xx3c5j38uy4 Monterrey NULL MX 25.6866|-100.3161 25.6866
-## 3: 3kfz8rbattyy Brookfield WI US 43.0606|-88.1064 43.0606
-## 4: tm5n0nkk6vui Northvale NJ US 41.0064|-73.9494 41.0064
-## 5: 41x16fll6h13 Norwalk CT US 41.1175|-73.4083 41.1175
-## 6: 0gzn02sfdkgl Watertown MA US 42.3708|-71.1833 42.3708
-## longitude county state_fips county_fips assignee_id
-## 1: -71.2361 Middlesex 25 25017 org_AARE0vwmqWJVw3VEQhqO
-## 2: -100.3160 NULL NULL NULL org_i7HMMn3SSFuW0jGC8dZr
-## 3: -88.1064 Waukesha 55 55133 org_il88HucVp82DFlrSiqHF
-## 4: -73.9494 Bergen 34 34003 org_dltfa8HjhhcG1eN1Af3J
-## 5: -73.4083 Fairfield 9 9001 org_UrbE3xev7LUsnuvRjbep
-## 6: -71.1833 Middlesex 25 25017 org_Hg5ThdGNeUqwAVSISglk
-## assignee_type name_first name_last organization
-## 1: 2 NULL NULL Raytheon Company
-## 2: 3 NULL NULL ZUBEX INDUSTRIAL SA DE CV
-## 3: 2 NULL NULL Milwaukee Electric Tool Corporation
-## 4: 2 NULL NULL Alex Toys, LLC
-## 5: 2 NULL NULL Xerox Corporation
-## 6: 2 NULL NULL MARKFORGED, INC.
-## uuid sequence
-## 1: vtus5eb4i7ebzleux3c8qfuir 0
-## 2: unykdm4rllult7dlswivtojfo 0
-## 3: de8maqgwv2aheidnovnd95njr 0
-## 4: 9jbbya3cdigrjtqxkjjl39s4y 0
-## 5: grm825lxh91ko3n1feva20zqr 0
-## 6: 1h5v4oycegaoh7dp95itxi6jg 0
-
The merged_assignee_location table contains 4,680,039 rows and the patent table contains 7,144,425. The complete_merge contains 4,680,039 rows. Thus, all of the patent_ids in the merged_assignee_location table have matches in the patent table.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/01_bulk_download_example_joins/join_inventor.Rmd b/01_bulk_download_example_joins/join_inventor.Rmd
deleted file mode 100644
index 4471a7d..0000000
--- a/01_bulk_download_example_joins/join_inventor.Rmd
+++ /dev/null
@@ -1,136 +0,0 @@
----
-title: "Merge Inventor, Location, and Patent Tables"
-output: html_document
----
-
-#### Note: This HTML document was last generated on `r Sys.Date()`
-
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_knit$set(root.dir = "TODO: Add Directory Path Here")
-
-library(data.table)
-library(scales)
-```
-
-## Load Patent Table
-```{r, warning=FALSE}
-patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-
-```
-
-## Load Inventor and Location Tables
-```{r, warning=FALSE}
-
-inventor <- fread("inventor.tsv")
-location <- fread("location.tsv")
-
-```
-
-```{r}
-head(inventor)
-head(location)
-
-```
-
-## Load Raw Inventor and Raw Location Tables
-```{r, warning=FALSE}
-rawinventor <- fread("rawinventor.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
-```
-
-```{r}
-head(rawinventor)
-colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-
-```
-
-## Join Raw Location and Location Tables
-```{r}
-merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-```
-
-```{r}
-head(merged_locations)
-
-n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-```
-
-
-The location table contains `r comma_format()(nrow(location))` rows while the rawlocation table contains `r comma_format()(nrow(rawlocation))` rows. Merging these tables results in a merged_locations table of `r comma_format()(nrow(merged_locations))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.
-
-## Join Raw Inventor and Inventor Tables
-```{r}
-# free up some memory
-rm(location)
-rm(rawlocation)
-
-```
-
-```{r}
-head(inventor)
-```
-
-```{r}
-head(rawinventor)
-```
-
-```{r}
-merged_inventors <- merge(rawinventor, inventor, by.x = c('inventor_id', 'name_first', 'name_last'), by.y = c('id', 'name_first', 'name_last'))
-```
-
-```{r}
-
-head(merged_inventors)
-n_non_matches <- nrow(rawinventor) - nrow(merged_inventors)
-```
-
-The inventor table contains `r comma_format()(nrow(inventor))` rows while the rawinventor table contains `r comma_format()(nrow(rawinventor))` rows. The resulting merged_inventors table contains `r comma_format()(nrow(merged_inventors))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawinventor table which do not contain matching inventor_ids, first names, and last names in the inventor table.
-
-## Join Merged_Inventors and Merged_Locations
-```{r}
-# free up some memory
-rm(inventor)
-rm(rawinventor)
-```
-
-```{r}
-merged_inventor_location <- merge(merged_inventors, merged_locations, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-```
-
-
-```{r}
-head(merged_inventor_location)
-n_non_matches <- nrow(merged_locations) - nrow(merged_inventor_location)
-```
-
-The merged_locations table contains `r comma_format()(nrow(merged_locations))` rows, while the merged_inventors table contains `r comma_format()(nrow(merged_inventors))` rows. The resulting merge of these to tables contains `r comma_format()(nrow(merged_inventor_location))` rows. Thus, there are `r comma_format()(n_non_matches)` rows in the merged_locations table that are not matched by rawlocation_id in the merged_inventors table.
-
-```{r}
-rm(merged_inventors)
-rm(merged_locations)
-
-```
-
-
-```{r}
-head(patent)
-```
-
-## Join Inventor, Location, and Patent Data
-```{r}
-complete_merge <- merge(patent, merged_inventor_location, by.x = c('patent_id'), by.y = c('patent_id'))
-```
-
-```{r}
-head(complete_merge)
-```
-
-The merged_inventor_location table contains `r comma_format()(nrow(merged_inventor_location))` rows and the patent table contains `r comma_format()(nrow(patent))` rows. The complete_merge contains `r comma_format()(nrow(complete_merge))` rows. Thus, all of the patent_ids in the merged_inventor_location table have matches in the patent table.
\ No newline at end of file
diff --git a/01_bulk_download_example_joins/join_inventor.html b/01_bulk_download_example_joins/join_inventor.html
deleted file mode 100644
index c583022..0000000
--- a/01_bulk_download_example_joins/join_inventor.html
+++ /dev/null
@@ -1,530 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Note: This HTML document was last generated on 2020-01-14
-
-
-
Load Patent Table
-
patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-
## patent_id patent_type number patent_country date
-## 1: 10000000 utility 10000000 US 2018-06-19
-## 2: 10000001 utility 10000001 US 2018-06-19
-## 3: 10000002 utility 10000002 US 2018-06-19
-## 4: 10000003 utility 10000003 US 2018-06-19
-## 5: 10000004 utility 10000004 US 2018-06-19
-## 6: 10000005 utility 10000005 US 2018-06-19
-## abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5: The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6: A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-## title
-## 1: Coherent LADAR using intra-pixel quadrature detection
-## 2: Injection molding machine and mold thickness control method
-## 3: Method for manufacturing polymer film and co-extruded film
-## 4: Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6: Article vacuum formation method and vacuum forming apparatus
-## kind num_claims filename withdrawn
-## 1: B2 20 ipg180619.xml NULL
-## 2: B2 12 ipg180619.xml NULL
-## 3: B2 9 ipg180619.xml NULL
-## 4: B2 18 ipg180619.xml NULL
-## 5: B2 6 ipg180619.xml NULL
-## 6: B2 4 ipg180619.xml NULL
-
-
-
Load Inventor and Location Tables
-
inventor <- fread("inventor.tsv")
-location <- fread("location.tsv")
-
head(inventor)
-
## id name_first name_last
-## 1: 10000001-2 Hyeon-Jae Yu
-## 2: 10000004-1 Miguel Jorge Zubiria Elizondo
-## 3: 10000004-2 Jose Juan Valadez Lopez
-## 4: 10000005-2 Katsunori Oda
-## 5: 10000006-1 Marc Saelen
-## 6: 10000014-2 Tim Diehlmann
-
head(location)
-
## id city state country latitude longitude county
-## 1: 000ii62xlraz Esparto CA US 38.6922 -122.0160 Yolo
-## 2: 000iprvfzu2f Limm NULL GB 11.1000 30.5167 NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe NULL DE 53.3763 10.5497 NULL
-## 4: 000osf8n2ysw Bouxwiller NULL FR 47.5051 7.3453 NULL
-## 5: 0014a0zfh259 Morance NULL FR 45.8978 4.7004 NULL
-## 6: 0018uf8e2x0f Dals Langed NULL SE 58.9167 12.3000 NULL
-## state_fips county_fips
-## 1: 6 6113
-## 2: NULL NULL
-## 3: NULL NULL
-## 4: NULL NULL
-## 5: NULL NULL
-## 6: NULL NULL
-
-
-
Load Raw Inventor and Raw Location Tables
-
rawinventor <- fread("rawinventor.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
head(rawinventor)
-
## uuid patent_id inventor_id
-## 1: 0000hccb98m2kc6g1v7128k5w 7646155 4341225-2
-## 2: 0000kwt5abwdu9f4av6zoa61t 4339721 4339721-2
-## 3: 0000n6xqianutadbzbgzwled7 6610738 6610738-6
-## 4: 0000n8nqsxhrztn7djlxou00k 6448562 6448562-2
-## 5: 0000p6jf5l8yzv04wimaoabab 4432679 4127345-1
-## 6: 0000tgs7dqvzrnfiad3b9fr06 8977851 6424872-10
-## rawlocation_id name_first name_last sequence rule_47
-## 1: omi6wqlrblholsssk9qx0dz5b Samuel G Woods 0
-## 2: 8uifwkdu885g2jxfu2uhnzgp1 Takuya Hosoda 1
-## 3: hxh83mos96occibi6wg9fzfuy Eva K. Mudrn 5
-## 4: cbejrj9ius2mty4ig27zqvj99 Muamer Zukic 1
-## 5: k7k03qpqzsoobzg1rgsr83q2e Donald J. Angelosanto 0
-## 6: ixb2b02fc6yq2q7fuj6hnycie Lee Allen Neitzel 0
-## deceased
-## 1: NULL
-## 2: NULL
-## 3: NULL
-## 4: NULL
-## 5: NULL
-## 6: NULL
-
colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-
## rawlocation_id location_id city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip Sunnyvale CA US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz Seoul NULL KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge LA US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z Osaka NULL JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w Austin TX US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z Nundah NULL AU
-## latlong
-## 1: 37.3689|-122.0353
-## 2: 37.5985|126.9783
-## 3: 30.4506|-91.1544
-## 4: 34.6937|135.5022
-## 5: 30.2669|-97.7428
-## 6: -27.4033|153.0603
-
-
-
Join Raw Location and Location Tables
-
merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-
head(merged_locations)
-
## location_id city state country rawlocation_id
-## 1: 000ii62xlraz Esparto CA US 09g0uz60wjm5ynib3xa28z8bn
-## 2: 000ii62xlraz Esparto CA US 0fyd31upf36smdme7oms1nhzi
-## 3: 000ii62xlraz Esparto CA US 2e4xhj4rjkwvu4gydwyiojzea
-## 4: 000ii62xlraz Esparto CA US 2j0ps8x68j7wsajxzxvsg6txx
-## 5: 000ii62xlraz Esparto CA US 49st0yptqxr9b2ecbp8l3eq97
-## 6: 000ii62xlraz Esparto CA US 4fgmrp96itwtsla9tglzqu0gb
-## latlong latitude longitude county state_fips county_fips
-## 1: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 2: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 3: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 4: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 5: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-## 6: 38.6922|-122.0161 38.6922 -122.016 Yolo 6 6113
-
n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-
The location table contains 142,188 rows while the rawlocation table contains 27,460,928 rows. Merging these tables results in a merged_locations table of 25,410,691 rows. Thus there are 2,050,237 rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.
-
-
-
Join Raw Inventor and Inventor Tables
-
# free up some memory
-rm(location)
-rm(rawlocation)
-
head(inventor)
-
## id name_first name_last
-## 1: 10000001-2 Hyeon-Jae Yu
-## 2: 10000004-1 Miguel Jorge Zubiria Elizondo
-## 3: 10000004-2 Jose Juan Valadez Lopez
-## 4: 10000005-2 Katsunori Oda
-## 5: 10000006-1 Marc Saelen
-## 6: 10000014-2 Tim Diehlmann
-
head(rawinventor)
-
## uuid patent_id inventor_id
-## 1: 0000hccb98m2kc6g1v7128k5w 7646155 4341225-2
-## 2: 0000kwt5abwdu9f4av6zoa61t 4339721 4339721-2
-## 3: 0000n6xqianutadbzbgzwled7 6610738 6610738-6
-## 4: 0000n8nqsxhrztn7djlxou00k 6448562 6448562-2
-## 5: 0000p6jf5l8yzv04wimaoabab 4432679 4127345-1
-## 6: 0000tgs7dqvzrnfiad3b9fr06 8977851 6424872-10
-## rawlocation_id name_first name_last sequence rule_47
-## 1: omi6wqlrblholsssk9qx0dz5b Samuel G Woods 0
-## 2: 8uifwkdu885g2jxfu2uhnzgp1 Takuya Hosoda 1
-## 3: hxh83mos96occibi6wg9fzfuy Eva K. Mudrn 5
-## 4: cbejrj9ius2mty4ig27zqvj99 Muamer Zukic 1
-## 5: k7k03qpqzsoobzg1rgsr83q2e Donald J. Angelosanto 0
-## 6: ixb2b02fc6yq2q7fuj6hnycie Lee Allen Neitzel 0
-## deceased
-## 1: NULL
-## 2: NULL
-## 3: NULL
-## 4: NULL
-## 5: NULL
-## 6: NULL
-
merged_inventors <- merge(rawinventor, inventor, by.x = c('inventor_id', 'name_first', 'name_last'), by.y = c('id', 'name_first', 'name_last'))
-
head(merged_inventors)
-
## inventor_id name_first name_last uuid
-## 1: 10000001-2 Hyeon-Jae Yu 5b2w88r3jwbmj2w5xf2u4ty80
-## 2: 10000004-1 Miguel Jorge Zubiria Elizondo luurs3qcxggn1rw211n9kt7gf
-## 3: 10000004-2 Jose Juan Valadez Lopez ycf0b8c5ezlrx4pzhl111ftyt
-## 4: 10000005-2 Katsunori Oda 4lw3e1g2uwntqcy2xgblb1cce
-## 5: 10000006-1 Marc Saelen rwxm1mlf89apoxr91jsaqfk8m
-## 6: 10000006-1 Marc Saelen xu020uor4gzorxepqeewgbbzn
-## patent_id rawlocation_id sequence rule_47 deceased
-## 1: 10000001 zh7bez8b146hfu0buyxaa0v9b 1 NULL
-## 2: 10000004 v8ucm2trnfns3id8krhmw8bty 0 NULL
-## 3: 10000004 mfezp7a3nfcq9r0pcu8qi286z 1 NULL
-## 4: 10000005 wylqo83g0a7zms7x227bjt9vx 1 NULL
-## 5: 10000006 rombrj5kywqys2cmzxk7p2rvc 0 NULL
-## 6: 10343329 zavbf7rou3y1q39qr8bl5rrpy 0 NULL
-
n_non_matches <- nrow(rawinventor) - nrow(merged_inventors)
-
The inventor table contains 3,857,228 rows while the rawinventor table contains 17,165,604 rows. The resulting merged_inventors table contains 14,697,445 rows. Thus there are 2,468,159 rows in the rawinventor table which do not contain matching inventor_ids, first names, and last names in the inventor table.
-
-
-
Join Merged_Inventors and Merged_Locations
-
# free up some memory
-rm(inventor)
-rm(rawinventor)
-
merged_inventor_location <- merge(merged_inventors, merged_locations, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-
head(merged_inventor_location)
-
## rawlocation_id inventor_id name_first name_last
-## 1: 00008o6jw8bp4c824ihgc1fcz 6362662-2 Guoxing Li
-## 2: 0000b606fnwnupiane3dfkoyl 6080888-2 Kannappan C. Chockalingham
-## 3: 0000iaytxly6w1a2q42efuk5p 4711077-7 Takeo Hattori
-## 4: 0000mzv8qbiyubouljdzswns2 D693064-1 Nicholas H. Dupree
-## 5: 0000nl2head6ga63f9iwoa4w1 3946173-1 Terry M. Haber
-## 6: 0000u1ntofpo3bnr6mv36z9gn 4105471-2 James E. Avery
-## uuid patent_id sequence rule_47 deceased
-## 1: yffjycuutcocumzt2msx2cwb7 8164309 0 NULL
-## 2: s32168nc17evwmldr6z9y6d4z 6080888 1 NULL
-## 3: 2m77qi6vvzfcp4ut11e64ohvs D660122 1 NULL
-## 4: 5uhw2w0slnoc4qiba14r4wnjn D850010 1 NULL
-## 5: 2x85x9q4ejdzifytyd1c9hnx3 5188615 0 NULL
-## 6: zc0j4hdr4l7l6lq0fkv8dq2yz 5942047 4 NULL
-## location_id city state country latlong latitude
-## 1: qxm7bum6wvip Sunnyvale CA US 37.3689|-122.0353 37.3689
-## 2: lmeg78jbn8nq Baton Rouge LA US 30.4506|-91.1544 30.4506
-## 3: ongtod0a98g7 Tokyo NULL JP 35.685|139.7514 35.6850
-## 4: x9z0kf8mxhvf Providence RI US 41.8239|-71.4133 41.8239
-## 5: 9rublg2e2su2 Lake Forest CA US 33.6469|-117.6883 33.6469
-## 6: 2opf1msvy0zz Issaquah WA US 47.5303|-122.0314 47.5303
-## longitude county state_fips county_fips
-## 1: -122.0350 Santa Clara 6 6085
-## 2: -91.1544 East Baton Rouge 22 22033
-## 3: 139.7510 NULL NULL NULL
-## 4: -71.4133 Providence 44 44007
-## 5: -117.6880 Orange 6 6059
-## 6: -122.0310 King 53 53033
-
n_non_matches <- nrow(merged_locations) - nrow(merged_inventor_location)
-
The merged_locations table contains 25,410,691 rows, while the merged_inventors table contains 14,697,445 rows. The resulting merge of these to tables contains 13,625,373 rows. Thus, there are 11,785,318 rows in the merged_locations table that are not matched by rawlocation_id in the merged_inventors table.
-
rm(merged_inventors)
-rm(merged_locations)
-
head(patent)
-
## patent_id patent_type number patent_country date
-## 1: 10000000 utility 10000000 US 2018-06-19
-## 2: 10000001 utility 10000001 US 2018-06-19
-## 3: 10000002 utility 10000002 US 2018-06-19
-## 4: 10000003 utility 10000003 US 2018-06-19
-## 5: 10000004 utility 10000004 US 2018-06-19
-## 6: 10000005 utility 10000005 US 2018-06-19
-## abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5: The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6: A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-## title
-## 1: Coherent LADAR using intra-pixel quadrature detection
-## 2: Injection molding machine and mold thickness control method
-## 3: Method for manufacturing polymer film and co-extruded film
-## 4: Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6: Article vacuum formation method and vacuum forming apparatus
-## kind num_claims filename withdrawn
-## 1: B2 20 ipg180619.xml NULL
-## 2: B2 12 ipg180619.xml NULL
-## 3: B2 9 ipg180619.xml NULL
-## 4: B2 18 ipg180619.xml NULL
-## 5: B2 6 ipg180619.xml NULL
-## 6: B2 4 ipg180619.xml NULL
-
-
-
Join Inventor, Location, and Patent Data
-
complete_merge <- merge(patent, merged_inventor_location, by.x = c('patent_id'), by.y = c('patent_id'))
-
head(complete_merge)
-
## patent_id patent_type number patent_country date
-## 1: 10000001 utility 10000001 US 2018-06-19
-## 2: 10000001 utility 10000001 US 2018-06-19
-## 3: 10000002 utility 10000002 US 2018-06-19
-## 4: 10000003 utility 10000003 US 2018-06-19
-## 5: 10000003 utility 10000003 US 2018-06-19
-## 6: 10000003 utility 10000003 US 2018-06-19
-## abstract
-## 1: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 2: The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 6: The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## title kind
-## 1: Injection molding machine and mold thickness control method B2
-## 2: Injection molding machine and mold thickness control method B2
-## 3: Method for manufacturing polymer film and co-extruded film B2
-## 4: Method for producing a container from a thermoplastic B2
-## 5: Method for producing a container from a thermoplastic B2
-## 6: Method for producing a container from a thermoplastic B2
-## num_claims filename withdrawn rawlocation_id
-## 1: 12 ipg180619.xml NULL 95igh4mbo217kt9dmb2x8r3il
-## 2: 12 ipg180619.xml NULL zh7bez8b146hfu0buyxaa0v9b
-## 3: 9 ipg180619.xml NULL wq19fdjfax9nrdzk88v66n3hy
-## 4: 18 ipg180619.xml NULL bg0r7733kk1djxew6d0jsfcr8
-## 5: 18 ipg180619.xml NULL c9av9x4gxgkr3bvhc2dtrhsvz
-## 6: 18 ipg180619.xml NULL d0bw7mgqb97j92ibz1xjxmfr7
-## inventor_id name_first name_last uuid sequence
-## 1: 7354823-3 Sun-Woo Lee enlwyeih9u2bjg5io9wvedcdq 0
-## 2: 10000001-2 Hyeon-Jae Yu 5b2w88r3jwbmj2w5xf2u4ty80 1
-## 3: 9833943-2 Dong-Hyeon Choi m5jfzgdhewd0gk2owlju7je84 2
-## 4: 8603280-2 Carsten Elsasser ilemj1o687jzj3wah1if9ubfw 1
-## 5: 8721828-1 Guido Bergmann 6i8q3apa83yhwrxxg04s1988v 0
-## 6: 9776357-4 Cristoph Mehren 81eavybjwpqpb4vcaieg78l8e 2
-## rule_47 deceased location_id city state country
-## 1: NULL ze3x8q3hwgh8 Gunpo-si NULL KR
-## 2: NULL 0e1go7wy364j Ansan-si NULL KR
-## 3: NULL gilg4hcy289v Yongin-si NULL KR
-## 4: NULL fdqcb3ukf6sw Pulheim NULL DE
-## 5: NULL qgidi0xps9o6 St. Augustin NULL DE
-## 6: NULL 9bxbajqqn4wg Konigswinter NULL DE
-## latlong latitude longitude county state_fips county_fips
-## 1: 37.3617|126.9352 37.3617 126.9350 NULL NULL NULL
-## 2: 37.3219|126.8309 37.3219 126.8310 NULL NULL NULL
-## 3: 37.2411|127.1776 37.2411 127.1780 NULL NULL NULL
-## 4: 51.0|6.8 51.0000 6.8000 NULL NULL NULL
-## 5: 50.7554|7.182 50.7554 7.1820 NULL NULL NULL
-## 6: 50.6833|7.1833 50.6833 7.1833 NULL NULL NULL
-
The merged_inventor_location table contains 13,625,373 rows and the patent table contains 7,144,425 rows. The complete_merge contains 13,625,373 rows. Thus, all of the patent_ids in the merged_inventor_location table have matches in the patent table.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/02_claims_examples/Claims Example.ipynb b/02_claims_examples/Claims Example.ipynb
deleted file mode 100644
index 1ace450..0000000
--- a/02_claims_examples/Claims Example.ipynb
+++ /dev/null
@@ -1,207 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Claims Parsing\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:04.693196Z",
- "start_time": "2020-05-10T20:39:04.681724Z"
- }
- },
- "outputs": [],
- "source": [
- "import csv\n",
- "pandas_chunksize = 100000\n",
- "delimiter =\"\\t\"\n",
- "filepath = \"claims_2005.tsv\"\n",
- "quote_type =csv.QUOTE_NONNUMERIC"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Using pandas"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:06.355920Z",
- "start_time": "2020-05-10T20:39:05.866932Z"
- }
- },
- "outputs": [],
- "source": [
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:06.497168Z",
- "start_time": "2020-05-10T20:39:06.490860Z"
- }
- },
- "outputs": [],
- "source": [
- "total_rows = 0 "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:31.874577Z",
- "start_time": "2020-05-10T20:39:07.897470Z"
- },
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "claims_chunks = pd.read_csv(\n",
- " filepath,\n",
- " sep=delimiter,\n",
- " chunksize=pandas_chunksize,\n",
- " quoting=quote_type,\n",
- " quotechar='\"')\n",
- "\n",
- "for claim_chunk in claims_chunks:\n",
- " total_rows += claim_chunk.shape[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:31.896463Z",
- "start_time": "2020-05-10T20:39:31.880724Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3628513"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "total_rows"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Using CSV reader"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:39:31.908592Z",
- "start_time": "2020-05-10T20:39:31.901140Z"
- }
- },
- "outputs": [],
- "source": [
- "import csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:41:12.864381Z",
- "start_time": "2020-05-10T20:41:12.857797Z"
- }
- },
- "outputs": [],
- "source": [
- "total_rows=0"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:41:33.025448Z",
- "start_time": "2020-05-10T20:41:13.005490Z"
- }
- },
- "outputs": [],
- "source": [
- "with open(filepath) as fp:\n",
- " reader = csv.reader(fp, delimiter=delimiter,quotechar='\"', quoting=quote_type)\n",
- " for csv_row in reader:\n",
- " total_rows+=1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "ExecuteTime": {
- "end_time": "2020-05-10T20:41:33.034002Z",
- "start_time": "2020-05-10T20:41:33.027644Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "3628514\n"
- ]
- }
- ],
- "source": [
- "print(total_rows)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/02_claims_examples/README.md b/02_claims_examples/README.md
deleted file mode 100644
index 3a67301..0000000
--- a/02_claims_examples/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Claims Parsing Settings
-
-| Setting | Value |
-|-----------------|----------------------------------------------------------------------------------------------------------------------------------------|
-| Delimiter | Tab (\t) |
-| Quote Character | Double Quote (") |
-| Quote Setting | Quote Non Numeric |
-| Python Example | [Example Code on GitHub](https://github.com/CSSIP-AIR/PatentsView-Code-Snippets/blob/master/02_claims_examples/Claims%20Example.ipynb) |
diff --git a/03_bulk_download_read_in/Python Scripts/README.md b/03_bulk_download_read_in/Python Scripts/README.md
deleted file mode 100644
index 0bdda6e..0000000
--- a/03_bulk_download_read_in/Python Scripts/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# PatentsView-Code-Snippets
-
-# Bulk Download Files: Python Read-in Scripts
-
-Below is a list of all bulk download files and information on whether or not there is a template read-in script currently available.
-
-If the script for a file is not currently available, other scripts in this repository can be used as reference. The files are all structured in the same manner so you should be able to use a template from a different file to help determine how to proceed.
-
-List of Resources:
-
-All Python Scripts were created using Python 3.7.4
-
-Necessary Packages:
-
-- zipfile
-- pandas
-- os
-
-| Bulk Download File | Status of Script |
-| --- |--- |
-| application | *Available* |
-| assignee | *Available* |
-| botanic | *Available* |
-| brf_sum_text | *In Progress* |
-| claim | *Available* |
-| cpc_current | *Available* |
-| cpc_group | *Available* |
-| cpc_subgroup | *Available* |
-| cpc_subsection | *Available* |
-| draw_desc_text | *In Progress* |
-| detail_desc_text | *In Progress* |
-| figures | *Available* |
-| foreign_priority | *Available* |
-| foreigncitation | *Available* |
-| government_interest | *Available* |
-| government_organization | *Available* |
-| inventor | *Available* |
-| inventor_gender | *Available* |
-| ipcr | *Available* |
-| lawyer | *Available* |
-| location | *Available* |
-| location_assignee | *Available* |
-| location_inventor | *In Progress* |
-| mainclass | *Available* |
-| mainclass_current | *Available* |
-| nber | *Available* |
-| nber_category | *Available* |
-| nber_subcategory | *Available* |
-| non_inventor_applicant | *Available* |
-| otherreference | *Available* |
-| patent | *Available* |
-| patent_assignee | *In Progress* |
-| patent_contractawardnumber | *Available* |
-| patent_govintorg | *Available* |
-| patent_inventor | *Available* |
-| patent_lawyer | *Available* |
-| pct_data | *Available* |
-| persistent_assignee_disambig | *Available* |
-| persistent_inventor_disambig | *Available* |
-| rawassignee | *Available* |
-| rawexaminer | *Available* |
-| rawinventor | *Available* |
-| rawlawyer | *Available* |
-| rawlocation | *Available* |
-| rel_app_text | *Available* |
-| subclass | *Available* |
-| subclass_current | *Available* |
-| us_term_of_grant | *Available* |
-| usapplicationcitation | *Available* |
-| uspatentcitation | *Available* |
-| uspc | *Available* |
-| uspc_current | *Available* |
-| usreldoc | *Available* |
-| wipo | *Available* |
-| wipo_field | *Available* |
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/application.py b/03_bulk_download_read_in/Python Scripts/archive/application.py
deleted file mode 100644
index 5a57ff4..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/application.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "application.tsv.zip"
-f_name = "application.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/assignee.py b/03_bulk_download_read_in/Python Scripts/archive/assignee.py
deleted file mode 100644
index 09fbe77..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/assignee.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "assignee.tsv.zip"
-f_name = "assignee.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/botanic.py b/03_bulk_download_read_in/Python Scripts/archive/botanic.py
deleted file mode 100644
index 77b202a..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/botanic.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "botanic.tsv.zip"
-f_name = "botanic.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-dtype={'sequence': int}
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py
deleted file mode 100644
index 1333bd7..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1976 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1976.tsv.zip"
-f_name = "claims_1976.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py
deleted file mode 100644
index 489e7d0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for 1977 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1977.tsv.zip"
-f_name = "claims_1977.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py
deleted file mode 100644
index a7fe6b0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1978 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1978.tsv.zip"
-f_name = "claims_1978.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py
deleted file mode 100644
index e39a4cd..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1979 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1979.tsv.zip"
-f_name = "claims_1979.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py
deleted file mode 100644
index 8cdbef2..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for 1980 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1980.tsv.zip"
-f_name = "claims_1980.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py
deleted file mode 100644
index c2e279c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1981 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1981.tsv.zip"
-f_name = "claims_1981.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py
deleted file mode 100644
index 91e2645..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1982 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1982.tsv.zip"
-f_name = "claims_1982.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py
deleted file mode 100644
index bd5959b..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1983 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1983.tsv.zip"
-f_name = "claims_1983.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py
deleted file mode 100644
index b9352f0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1984 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1984.tsv.zip"
-f_name = "claims_1984.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py
deleted file mode 100644
index 3336118..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1985 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1985.tsv.zip"
-f_name = "claims_1985.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py
deleted file mode 100644
index 8aac0e5..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for 1986 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1986.tsv.zip"
-f_name = "claims_1986.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py
deleted file mode 100644
index e462adc..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1987 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1987.tsv.zip"
-f_name = "claims_1987.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py
deleted file mode 100644
index dd808fe..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1988 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1988.tsv.zip"
-f_name = "claims_1988.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py
deleted file mode 100644
index ee4209e..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1989 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1989.tsv.zip"
-f_name = "claims_1989.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py
deleted file mode 100644
index 7ecf193..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1990 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1990.tsv.zip"
-f_name = "claims_1990.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py
deleted file mode 100644
index 0bceb46..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1991 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1991.tsv.zip"
-f_name = "claims_1991.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py
deleted file mode 100644
index c27c0d6..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1992 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1992.tsv.zip"
-f_name = "claims_1992.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py
deleted file mode 100644
index 39749cb..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1993 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1993.tsv.zip"
-f_name = "claims_1993.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py
deleted file mode 100644
index 388933c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1994 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1994.tsv.zip"
-f_name = "claims_1994.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py
deleted file mode 100644
index 797fa2e..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1995 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1995.tsv.zip"
-f_name = "claims_1995.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py
deleted file mode 100644
index d6dafb5..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1996 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1996.tsv.zip"
-f_name = "claims_1996.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py
deleted file mode 100644
index eb4eb41..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1997 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1997.tsv.zip"
-f_name = "claims_1997.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py
deleted file mode 100644
index 5c1f6c5..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1998 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1998.tsv.zip"
-f_name = "claims_1998.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py
deleted file mode 100644
index bacab79..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 1999 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_1999.tsv.zip"
-f_name = "claims_1999.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py
deleted file mode 100644
index 59f4794..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2000 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2000.tsv.zip"
-f_name = "claims_2000.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py
deleted file mode 100644
index d50944b..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2001 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2001.tsv.zip"
-f_name = "claims_2001.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py
deleted file mode 100644
index 1a50dca..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2002 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2002.tsv.zip"
-f_name = "claims_2002.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py
deleted file mode 100644
index 64a2ebe..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2003 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2003.tsv.zip"
-f_name = "claims_2003.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py
deleted file mode 100644
index 116e3d5..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2004 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2004.tsv.zip"
-f_name = "claims_2004.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py
deleted file mode 100644
index 23bc47d..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2005 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2005.tsv.zip"
-f_name = "claims_2005.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py
deleted file mode 100644
index 170b70c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2006 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2006.tsv.zip"
-f_name = "claims_2006.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py
deleted file mode 100644
index 26e0f31..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2007 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2007.tsv.zip"
-f_name = "claims_2007.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py
deleted file mode 100644
index be148e1..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2008 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2008.tsv.zip"
-f_name = "claims_2008.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py
deleted file mode 100644
index 0c815a8..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2009 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2009.tsv.zip"
-f_name = "claims_2009.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py
deleted file mode 100644
index 225e406..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2010 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2010.tsv.zip"
-f_name = "claims_2010.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py
deleted file mode 100644
index 7caaf7b..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2011 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2011.tsv.zip"
-f_name = "claims_2011.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py
deleted file mode 100644
index cf7c65c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2012 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2012.tsv.zip"
-f_name = "claims_2012.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py
deleted file mode 100644
index 30c9839..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2013 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2013.tsv.zip"
-f_name = "claims_2013.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py
deleted file mode 100644
index a7d6faf..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2014 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2014.tsv.zip"
-f_name = "claims_2014.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py
deleted file mode 100644
index ed368a8..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2015 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2015.tsv.zip"
-f_name = "claims_2015.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py
deleted file mode 100644
index a0f6009..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2016 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2016.tsv.zip"
-f_name = "claims_2016.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py
deleted file mode 100644
index 40e8fe1..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2017 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2017.tsv.zip"
-f_name = "claims_2017.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py
deleted file mode 100644
index e9901fd..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2018 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2018.tsv.zip"
-f_name = "claims_2018.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py
deleted file mode 100644
index f4cf4b0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2019 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2019.tsv.zip"
-f_name = "claims_2019.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py
deleted file mode 100644
index ffaf535..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for 2020 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "claims_2020.tsv.zip"
-f_name = "claims_2020.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py
deleted file mode 100644
index 234de57..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc_current.tsv.zip"
-f_name = "cpc_current.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py
deleted file mode 100644
index 0085ba9..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc_group.tsv.zip"
-f_name = "cpc_group.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py
deleted file mode 100644
index fcdc590..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc_subgroup.tsv.zip"
-f_name = "cpc_subgroup.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py
deleted file mode 100644
index 045366a..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc_subsection.tsv.zip"
-f_name = "cpc_subsection.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/figures.py b/03_bulk_download_read_in/Python Scripts/archive/figures.py
deleted file mode 100644
index ed62a0a..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/figures.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "figures.tsv.zip"
-f_name = "figures.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(include='float64'))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py b/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py
deleted file mode 100644
index 90b407c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Foreign priority data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("//Dc1fs/dc1ehd/share/Science Policy Portfolio/PatentsView IV/Documentation/Tables/20200331")
-file_name = "foreign_priority.tsv.zip"
-f_name = "foreign_priority.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py b/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py
deleted file mode 100644
index a444d48..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Citations made to foreign patents by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "foreigncitation.tsv.zip"
-f_name = "foreigncitation.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/government_interest.py b/03_bulk_download_read_in/Python Scripts/archive/government_interest.py
deleted file mode 100644
index 5ed4347..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/government_interest.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw government interest statements on all patents (where available)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "government_interest.tsv.zip"
-f_name = "government_interest.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/government_organization.py b/03_bulk_download_read_in/Python Scripts/archive/government_organization.py
deleted file mode 100644
index 84e270d..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/government_organization.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Organization names and related agency hierarchy parsed from the government interest statements on all patents (where available)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "government_organization.tsv.zip"
-f_name = "government_organization.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/inventor.py b/03_bulk_download_read_in/Python Scripts/archive/inventor.py
deleted file mode 100644
index c06fb90..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/inventor.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Disambiguated inventor data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "inventor.tsv.zip"
-f_name = "inventor.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py b/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py
deleted file mode 100644
index 7d0cb61..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "inventor_gender.tsv.zip"
-f_name = "inventor_gender.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/ipcr.py b/03_bulk_download_read_in/Python Scripts/archive/ipcr.py
deleted file mode 100644
index 01d8c5d..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/ipcr.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for International Patent Classification data for all patents (as of publication date)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "ipcr.tsv.zip"
-f_name = "ipcr.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/lawyer.py b/03_bulk_download_read_in/Python Scripts/archive/lawyer.py
deleted file mode 100644
index b68f159..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/lawyer.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Disambiguated lawyer data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "lawyer.tsv.zip"
-f_name = "lawyer.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/location.py b/03_bulk_download_read_in/Python Scripts/archive/location.py
deleted file mode 100644
index 522768c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/location.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Disambiguated location data, including latitude and longitude
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "location.tsv.zip"
-f_name = "location.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py b/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py
deleted file mode 100644
index dec8c20..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Metadata table for many-to-many relationships
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "location_assignee.tsv.zip"
-f_name = "location_assignee.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/mainclass.py b/03_bulk_download_read_in/Python Scripts/archive/mainclass.py
deleted file mode 100644
index 647085c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/mainclass.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "mainclass.tsv.zip"
-f_name = "mainclass.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py b/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py
deleted file mode 100644
index 6414842..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "mainclass_current.tsv.zip"
-f_name = "mainclass_current.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber.py b/03_bulk_download_read_in/Python Scripts/archive/nber.py
deleted file mode 100644
index 4044f8f..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/nber.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "nber.tsv.zip"
-f_name = "nber.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber_category.py b/03_bulk_download_read_in/Python Scripts/archive/nber_category.py
deleted file mode 100644
index 7ed96b0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/nber_category.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "nber_category.tsv.zip"
-f_name = "nber_category.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py b/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py
deleted file mode 100644
index c641240..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "nber_subcategory.tsv.zip"
-f_name = "nber_subcategory.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py b/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py
deleted file mode 100644
index 5e7e7ad..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "non_inventor_applicant.tsv.zip"
-f_name = "non_inventor_applicant.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/otherreference.py b/03_bulk_download_read_in/Python Scripts/archive/otherreference.py
deleted file mode 100644
index d45f8e2..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/otherreference.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "otherreference.tsv.zip"
-f_name = "otherreference.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-dtype={'sequence': int}
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent.py b/03_bulk_download_read_in/Python Scripts/archive/patent.py
deleted file mode 100644
index 6f31ed7..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/patent.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "patent.tsv.zip"
-f_name = "patent.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-dtype={'sequence': int}
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py b/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py
deleted file mode 100644
index 8549df3..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Contract or award numbers parsed from the government interest statements on all patents (where available)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "patent_contractawardnumber.tsv.zip"
-f_name = "patent_contractawardnumber.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py b/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py
deleted file mode 100644
index 687ce95..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Metadata table with patent-to-organization relationships linked to the government_organization table
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "patent_govintorg.tsv.zip"
-f_name = "patent_govintorg.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py b/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py
deleted file mode 100644
index f3f91a8..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Metadata table for many-to-many relationships
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "patent_inventor.tsv.zip"
-f_name = "patent_inventor.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py b/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py
deleted file mode 100644
index 086a5ac..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for Metadata table for many-to-many relationships
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "patent_lawyer.tsv.zip"
-f_name = "patent_lawyer.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/pct_data.py b/03_bulk_download_read_in/Python Scripts/archive/pct_data.py
deleted file mode 100644
index 59d6a69..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/pct_data.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for PCT data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pct_data.tsv.zip"
-f_name = "pct_data.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py b/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py
deleted file mode 100644
index 01aa9f0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Persistant Assignee Disambiguation
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "persistent_assignee_disambig.tsv.zip"
-f_name = "persistent_assignee_disambig.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of columns, observations, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py b/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py
deleted file mode 100644
index 36124e9..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Persistant Inventor Disambiguation
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "persistent_inventor_disambig.tsv.zip"
-f_name = "persistent_inventor_disambig.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py b/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py
deleted file mode 100644
index 90b9ff4..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw assignee information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "rawassignee.tsv.zip"
-f_name = "rawassignee.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py b/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py
deleted file mode 100644
index 3ddfc53..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw examiner information
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "rawexaminer.tsv.zip"
-f_name = "rawexaminer.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py b/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py
deleted file mode 100644
index 32bba30..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw inventor information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "rawinventor.tsv.zip"
-f_name = "rawinventor.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py b/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py
deleted file mode 100644
index 43f7b7b..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "rawlawyer.tsv.zip"
-f_name = "rawlawyer.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py b/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py
deleted file mode 100644
index 89b1f17..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "rawlocation.tsv.zip"
-f_name = "rawlocation.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-dtype={'sequence': int}
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py b/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py
deleted file mode 100644
index 9fe0cc4..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "rel_app_text.tsv.zip"
-f_name = "rel_app_text.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/subclass.py b/03_bulk_download_read_in/Python Scripts/archive/subclass.py
deleted file mode 100644
index 07603dd..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/subclass.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "subclass.tsv.zip"
-f_name = "subclass.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py b/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py
deleted file mode 100644
index 1b37cc0..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "subclass_current.tsv.zip"
-f_name = "subclass_current.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py b/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py
deleted file mode 100644
index 38d86cb..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for U.S. term of grant data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "us_term_of_grant.tsv.zip"
-f_name = "us_term_of_grant.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py b/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py
deleted file mode 100644
index f1dd96c..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Citations made to US patent applications by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "usapplicationcitation.tsv.zip"
-f_name = "usapplicationcitation.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py b/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py
deleted file mode 100644
index 5272086..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Citations made to US granted patents by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "uspatentcitation.tsv.zip"
-f_name = "uspatentcitation.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 2*(10 ** 6)
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspc.py b/03_bulk_download_read_in/Python Scripts/archive/uspc.py
deleted file mode 100644
index 7656429..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/uspc.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for USPC classification data for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "uspc.tsv.zip"
-f_name = "uspc.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py b/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py
deleted file mode 100644
index 3847dae..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Current USPC classification data for all patents up to May 2015
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "uspc_current.tsv.zip"
-f_name = "uspc_current.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py b/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py
deleted file mode 100644
index 6c238f9..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for U.S. related documents (post-2005 patents only)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "usreldoc.tsv.zip"
-f_name = "usreldoc.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/wipo.py b/03_bulk_download_read_in/Python Scripts/archive/wipo.py
deleted file mode 100644
index 4e9e970..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/wipo.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for WIPO technology fields for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "wipo.tsv.zip"
-f_name = "wipo.tsv"
-zf = zip.ZipFile(file_name)
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py b/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py
deleted file mode 100644
index 8df42ff..0000000
--- a/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Lookup table of WIPO technology fields
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "wipo_field.tsv.zip"
-f_name = "wipo_field.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py
deleted file mode 100644
index d5b42dc..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_applicant_not_disambiguated.tsv.zip"
-f_name = "g_applicant_not_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_application.py b/03_bulk_download_read_in/Python Scripts/g_application.py
deleted file mode 100644
index c8d062b..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_application.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_application.tsv.zip"
-f_name = "g_application.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py
deleted file mode 100644
index dec2f55..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_assignee_disambiguated.tsv.zip"
-f_name = "g_assignee_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py
deleted file mode 100644
index 7eb3a85..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw assignee information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_assignee_not_disambiguated.tsv.zip"
-f_name = "g_assignee_not_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py
deleted file mode 100644
index 69be381..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Disambiguated lawyer data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_attorney_disambiguated.tsv.zip"
-f_name = "g_attorney_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py
deleted file mode 100644
index 9247fb8..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_attorney_not_disambiguated.tsv.zip"
-f_name = "g_attorney_not_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_botanic.py b/03_bulk_download_read_in/Python Scripts/g_botanic.py
deleted file mode 100644
index db37c46..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_botanic.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_botanic.tsv.zip"
-f_name = "g_botanic.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_cpc_title.py b/03_bulk_download_read_in/Python Scripts/g_cpc_title.py
deleted file mode 100644
index 01fd94e..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_cpc_title.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_cpc_title.tsv.zip"
-f_name = "g_cpc_title.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py
deleted file mode 100644
index 39e7553..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw examiner information
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_examiner_not_disambiguated.tsv.zip"
-f_name = "g_examiner_not_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_figures.py b/03_bulk_download_read_in/Python Scripts/g_figures.py
deleted file mode 100644
index a3fbdea..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_figures.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_figures.tsv.zip"
-f_name = "g_figures.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(include='float64'))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py b/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py
deleted file mode 100644
index 06286c0..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Citations made to foreign patents by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_foreign_citation.tsv.zip"
-f_name = "g_foreign_citation.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py b/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py
deleted file mode 100644
index 964fca9..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Foreign priority data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_foreign_priority.tsv.zip"
-f_name = "g_foreign_priority.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_gov_interest.py b/03_bulk_download_read_in/Python Scripts/g_gov_interest.py
deleted file mode 100644
index d635334..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_gov_interest.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Raw government interest statements on all patents (where available)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_gov_interest.tsv.zip"
-f_name = "g_gov_interest.tsv"
-
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-print(df.head())
-
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py b/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py
deleted file mode 100644
index 01b2695..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Metadata table with patent-to-organization relationships linked to the government_organization table
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_gov_interest_org.tsv.zip"
-f_name = "g_gov_interest_org.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py
deleted file mode 100644
index 1a406d6..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Metadata table for many-to-many relationships
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_inventor_disambiguated.tsv.zip"
-f_name = "g_inventor_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py
deleted file mode 100644
index a416d9c..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw inventor information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_inventor_not_disambiguated.tsv.zip"
-f_name = "g_inventor_not_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py b/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py
deleted file mode 100644
index 12a39a3..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for International Patent Classification data for all patents (as of publication date)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_ipc_at_issue.tsv.zip"
-f_name = "g_ipc_at_issue.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py
deleted file mode 100644
index b23e0c8..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Disambiguated location data, including latitude and longitude
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_location_disambiguated.tsv.zip"
-f_name = "g_location_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py
deleted file mode 100644
index a92743e..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_location_not_disambiguated.tsv.zip"
-f_name = "g_location_not_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- dtype={'sequence': int}
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_other_reference.py b/03_bulk_download_read_in/Python Scripts/g_other_reference.py
deleted file mode 100644
index f1a1989..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_other_reference.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_other_reference.tsv.zip"
-f_name = "g_other_reference.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-chunksize = 15*(10 ** 5)
-count = 1
-n_obs = 0
-dtype={'sequence': int}
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_patent.py b/03_bulk_download_read_in/Python Scripts/g_patent.py
deleted file mode 100644
index 733472e..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_patent.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_patent.tsv.zip"
-f_name = "g_patent.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- dtype={'sequence': int}
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/g_pct_data.py b/03_bulk_download_read_in/Python Scripts/g_pct_data.py
deleted file mode 100644
index 46034c2..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_pct_data.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for PCT data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_pct_data.tsv.zip"
-f_name = "g_pct_data.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py b/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py
deleted file mode 100644
index 4404169..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Persistant Assignee Disambiguation
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_persistent_assignee.tsv.zip"
-f_name = "g_persistent_assignee.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of columns, observations, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py b/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py
deleted file mode 100644
index a3f8cab..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Persistant Inventor Disambiguation
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_persistent_inventor.tsv.zip"
-f_name = "g_persistent_inventor.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py b/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py
deleted file mode 100644
index 5845e89..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "g_rel_app_text.tsv.zip"
-f_name = "g_rel_app_text.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py b/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py
deleted file mode 100644
index 9ce0206..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Citations made to US patent applications by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_us_application_citation.tsv.zip"
-f_name = "g_us_application_citation.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py b/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py
deleted file mode 100644
index b0f0681..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for Citations made to US granted patents by US patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_us_patent_citation.tsv.zip"
-f_name = "g_us_patent_citation.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 2*(10 ** 6)
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py b/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py
deleted file mode 100644
index afc1ff1..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for U.S. related documents (post-2005 patents only)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_us_rel_doc.tsv.zip"
-f_name = "g_us_rel_doc.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py b/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py
deleted file mode 100644
index 9e6d9ae..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for U.S. term of grant data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "g_us_term_of_grant.tsv.zip"
-f_name = "g_us_term_of_grant.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py b/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py
deleted file mode 100644
index 79cbad3..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for USPC classification data for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_uspc_at_issue.tsv.zip"
-f_name = "g_uspc_at_issue.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py b/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py
deleted file mode 100644
index 9435a51..0000000
--- a/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for WIPO technology fields for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "g_wipo_technology.tsv.zip"
-f_name = "g_wipo_technology.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py
deleted file mode 100644
index 19449a2..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_applicant_not_disambiguated.tsv.zip"
-f_name = "pg_applicant_not_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py
deleted file mode 100644
index c52ca98..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_assignee_disambiguated.tsv.zip"
-f_name = "pg_assignee_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py
deleted file mode 100644
index a49f576..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw assignee information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "pg_assignee_not_disambiguated.tsv.zip"
-f_name = "pg_assignee_not_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py
deleted file mode 100644
index cbd8097..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_cpc_at_issue.tsv.zip"
-f_name = "pg_cpc_at_issue.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py
deleted file mode 100644
index 9d9ed78..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_cpc_current.tsv.zip"
-f_name = "pg_cpc_current.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py
deleted file mode 100644
index 9cda79e..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_cpc_title.tsv.zip"
-f_name = "pg_cpc_title.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py b/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py
deleted file mode 100644
index 7c50401..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Foreign priority data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pg_foreign_priority.tsv.zip"
-f_name = "pg_foreign_priority.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py b/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py
deleted file mode 100644
index cf4f32c..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Raw government interest statements on all patents (where available)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pg_gov_interest.tsv.zip"
-f_name = "pg_gov_interest.tsv"
-
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-print(df.head())
-
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py b/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py
deleted file mode 100644
index 907752e..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_granted_pgpubs_crosswalk.tsv.zip"
-f_name = "pg_granted_pgpubs_crosswalk.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py
deleted file mode 100644
index ec5926c..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Disambiguated inventor data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pg_inventor_disambiguated.tsv.zip"
-f_name = "pg_inventor_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py
deleted file mode 100644
index e6b5776..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for Raw inventor information as it appears in the source text and XML files
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "pg_inventor_not_disambiguated.tsv.zip"
-f_name = "pg_inventor_not_disambiguated.tsv"
-with zip.ZipFile(file_name) as zf:
-chunksize = 10 ** 6
-count = 1
-n_obs = 0
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py
deleted file mode 100644
index b97e1d4..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#Read-in script for International Patent Classification data for all patents (as of publication date)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "pg_ipc_at_issue.tsv.zip"
-f_name = "pg_ipc_at_issue.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py
deleted file mode 100644
index 994464e..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for Disambiguated location data, including latitude and longitude
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pg_location_disambiguated.tsv.zip"
-f_name = "pg_location_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py
deleted file mode 100644
index d11d3d7..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_location_not_disambiguated.tsv.zip"
-f_name = "pg_location_not_disambiguated.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
- chunksize = 15*(10 ** 5)
- count = 1
- n_obs = 0
- dtype={'sequence': int}
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-print(df.describe(exclude=[np.number]))
diff --git a/03_bulk_download_read_in/Python Scripts/pg_pct_data.py b/03_bulk_download_read_in/Python Scripts/pg_pct_data.py
deleted file mode 100644
index 9d5ccfa..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_pct_data.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Read-in script for PCT data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pg_pct_data.tsv.zip"
-f_name = "pg_pct_data.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-# Print first five observations
-print(df.head())
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Provide additional information on certain variables.
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_published_application.py b/03_bulk_download_read_in/Python Scripts/pg_published_application.py
deleted file mode 100644
index 245cc02..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_published_application.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_published_application.tsv.zip"
-f_name = "pg_published_application.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py b/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py
deleted file mode 100644
index 3216619..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for Number of figures and sheets
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "pg_rel_app_text.tsv.zip"
-f_name = "pg_rel_app_text.tsv"
-# Selecting the zip file.
-with zip.ZipFile(file_name) as zf:
-# Reading the selected file in the zip.
- with zf.open(f_name) as openfile:
- df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC)
-
-# Print first five observations
-df.head()
-# Print summary of data: number of columns, observations, and each variable data type
-print(len(df))
-df.info()
-# Print basic summary statistics for numerical variables
-print(df.describe(exclude=[np.number]))
\ No newline at end of file
diff --git a/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py
deleted file mode 100644
index c524133..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#Read-in script for USPC classification data for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "pg_uspc_at_issue.tsv.zip"
-f_name = "pg_uspc_at_issue.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
diff --git a/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py b/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py
deleted file mode 100644
index d5657d5..0000000
--- a/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Read-in script for WIPO technology fields for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "pg_wipo_technology.tsv.zip"
-f_name = "pg_wipo_technology.tsv"
-with zip.ZipFile(file_name) as zf:
- chunksize = 10 ** 6
- count = 1
- n_obs = 0
- for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
-
-
-
diff --git a/03_bulk_download_read_in/R Scripts/README.md b/03_bulk_download_read_in/R Scripts/README.md
deleted file mode 100644
index 8d6e135..0000000
--- a/03_bulk_download_read_in/R Scripts/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# PatentsView-Code-Snippets
-
-# Bulk Download Files: R Read-in Scripts
-
-Below is a list of all bulk download files and information on whether or not there is a template read-in script currently available.
-
-If the script for a file is not currently available, other scripts in this repository can be used as reference. The files are all structured in the same manner so you should be able to use a template from a different file to help determine how to proceed.
-
-
-
-List of Resources:
-
-All R scripts were created using R 3.5.2 and R 4.0
-
-Necessary Packages:
-
-- data.table
-
-
-| Bulk Download File | Status of Script |
-| --- |--- |
-| application | *Available* |
-| assignee | *Available* |
-| botanic | *Available* |
-| brf_sum_text | *In Progress* |
-| claim | *Available* |
-| cpc_current | *Available* |
-| cpc_group | *Available* |
-| cpc_subgroup | *Available* |
-| cpc_subsection | *Available* |
-| draw_desc_text | *In Progress* |
-| detail_desc_text | *In Progress* |
-| foreign_priority | *Available* |
-| figures | *Available* |
-| foreigncitation | *Available* |
-| government_interest | *Available* |
-| government_organization | *Available* |
-| inventor | *Available* |
-| inventor_gender | *Available* |
-| ipcr | *Available* |
-| lawyer | *Available* |
-| location | *Available* |
-| location_assignee | *Available* |
-| location_inventor | *In Progress* |
-| mainclass | *Available* |
-| mainclass_current | *Available* |
-| nber | *Available* |
-| nber_category | *Available* |
-| nber_subcategory | *Available* |
-| non_inventor_applicant | *Available* |
-| otherreference | *Available* |
-| patent | *Available* |
-| patent_assignee | *In Progress* |
-| patent_contractawardnumber | *Available* |
-| patent_govintorg | *Available* |
-| patent_inventor | *Available* |
-| patent_lawyer | *Available* |
-| pct_data | *Available* |
-| persistent_assignee_disambig | *Available* |
-| persistent_inventor_disambig | *Available* |
-| rawassignee | *Available* |
-| rawexaminer | *Available* |
-| rawinventor | *Available* |
-| rawlawyer | *Available* |
-| rawlocation | *Available* |
-| rel_app_text | *Available* |
-| subclass | *Available* |
-| subclass_current | *Available* |
-| us_term_of_grant | *Available* |
-| usapplicationcitation | *In Progress* |
-| uspatentcitation | *In Progress* |
-| uspc | *Available* |
-| uspc_current | *Available* |
-| usreldoc | *Available* |
-| wipo | *Available* |
-| wipo_field | *Available* |
\ No newline at end of file
diff --git a/03_bulk_download_read_in/R Scripts/archive/assignee.rmd b/03_bulk_download_read_in/R Scripts/archive/assignee.rmd
deleted file mode 100644
index a971b18..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/assignee.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("assignee.tsv.zip", "assignee.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/botanic.rmd b/03_bulk_download_read_in/R Scripts/archive/botanic.rmd
deleted file mode 100644
index 5679f67..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/botanic.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for botanic data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("botanic.tsv.zip", "botanic.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd
deleted file mode 100644
index eb5b2ad..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1976 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1976.tsv.zip", "claims_1976.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd
deleted file mode 100644
index 51869ce..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1977 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1977.tsv.zip", "claims_1977.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd
deleted file mode 100644
index 1d3df6e..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1978 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1978.tsv.zip", "claims_1978.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd
deleted file mode 100644
index b3efe50..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1979 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1979.tsv.zip", "claims_1979.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd
deleted file mode 100644
index 3951c4a..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1980 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1980.tsv.zip", "claims_1980.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd
deleted file mode 100644
index 106eab3..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1981 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1981.tsv.zip", "claims_1981.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd
deleted file mode 100644
index aaceec3..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1982 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1982.tsv.zip", "claims_1982.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd
deleted file mode 100644
index 42cf546..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1983 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1983.tsv.zip", "claims_1983.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd
deleted file mode 100644
index 8c3245e..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1984 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1984.tsv.zip", "claims_1984.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd
deleted file mode 100644
index 014d9dc..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1985 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1985.tsv.zip", "claims_1985.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd
deleted file mode 100644
index 77bb994..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1986 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1986.tsv.zip", "claims_1986.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd
deleted file mode 100644
index f7a4e31..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1987 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1987.tsv.zip", "claims_1987.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd
deleted file mode 100644
index 2a9fb76..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1988 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1988.tsv.zip", "claims_1988.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd
deleted file mode 100644
index 815952c..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1989 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1989.tsv.zip", "claims_1989.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd
deleted file mode 100644
index c437037..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1990 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1990.tsv.zip", "claims_1990.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd
deleted file mode 100644
index b7f44ce..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1991 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1991.tsv.zip", "claims_1991.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd
deleted file mode 100644
index b8d8e54..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1992 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1992.tsv.zip", "claims_1992.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd
deleted file mode 100644
index 8f46b84..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1993 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1993.tsv.zip", "claims_1993.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd
deleted file mode 100644
index 621a82a..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1994 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1994.tsv.zip", "claims_1994.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd
deleted file mode 100644
index 8564015..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1995 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1995.tsv.zip", "claims_1995.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd
deleted file mode 100644
index bf6fa74..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1996 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1996.tsv.zip", "claims_1996.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd
deleted file mode 100644
index 358e9f1..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1997 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1997.tsv.zip", "claims_1997.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd
deleted file mode 100644
index 13bff46..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1998 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1998.tsv.zip", "claims_1998.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd
deleted file mode 100644
index 264c005..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 1999 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_1999.tsv.zip", "claims_1999.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd
deleted file mode 100644
index eab6314..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2000 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2000.tsv.zip", "claims_2000.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd
deleted file mode 100644
index 9d357ef..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2001 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2001.tsv.zip", "claims_2001.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd
deleted file mode 100644
index 97542b0..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2002 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2002.tsv.zip", "claims_2002.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd
deleted file mode 100644
index 7967c8d..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2003 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2003.tsv.zip", "claims_2003.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd
deleted file mode 100644
index 4076ceb..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2004 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2004.tsv.zip", "claims_2004.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd
deleted file mode 100644
index 2947ad7..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2005 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2005.tsv.zip", "claims_2005.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd
deleted file mode 100644
index bd9fb5f..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2006 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2006.tsv.zip", "claims_2006.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd
deleted file mode 100644
index 7020d50..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2007 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2007.tsv.zip", "claims_2007.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd
deleted file mode 100644
index 5bdf2d7..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2008 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2008.tsv.zip", "claims_2008.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd
deleted file mode 100644
index feefdc3..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2009 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2009.tsv.zip", "claims_2009.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd
deleted file mode 100644
index 6aa86f4..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2010 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2010.tsv.zip", "claims_2010.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd
deleted file mode 100644
index bc485a8..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2011 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2011.tsv.zip", "claims_2011.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd
deleted file mode 100644
index 4c4d00b..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2012 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2012.tsv.zip", "claims_2012.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd
deleted file mode 100644
index d64f61c..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2013 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2013.tsv.zip", "claims_2013.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd
deleted file mode 100644
index 2b7b32a..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2014 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2014.tsv.zip", "claims_2014.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd
deleted file mode 100644
index e202fc1..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2015 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2015.tsv.zip", "claims_2015.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd
deleted file mode 100644
index 1f458ed..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2016 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2016.tsv.zip", "claims_2016.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd
deleted file mode 100644
index c4a60c4..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2017 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2017.tsv.zip", "claims_2017.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd
deleted file mode 100644
index 871d569..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2018 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2018.tsv.zip", "claims_2018.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd
deleted file mode 100644
index c4bd876..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2019 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2019.tsv.zip", "claims_2019.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd
deleted file mode 100644
index c27fc25..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for 2020 claims data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("claims_2020.tsv.zip", "claims_2020.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd
deleted file mode 100644
index 1366d3d..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("cpc_current.tsv.zip", "cpc_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd
deleted file mode 100644
index ed24e64..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_group data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("cpc_group.tsv.zip", "cpc_group.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd
deleted file mode 100644
index 57d8554..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_subgroup data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("cpc_subgroup.tsv.zip", "cpc_subgroup.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd
deleted file mode 100644
index fb65cc6..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_subsection data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("cpc_subsection.tsv.zip", "cpc_subsection.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/figures.Rmd b/03_bulk_download_read_in/R Scripts/archive/figures.Rmd
deleted file mode 100644
index 092ddd8..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/figures.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for figures data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("figures.tsv.zip", "figures.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd
deleted file mode 100644
index 1f55108..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for foreign_priority data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("foreign_priority.tsv.zip", "foreign_priority.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd
deleted file mode 100644
index 9ee92da..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for foreign citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("foreigncitation.tsv.zip", "foreigncitation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd b/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd
deleted file mode 100644
index 0e92cb1..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_interest data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("government_interest.tsv.zip", "government_interest.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd b/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd
deleted file mode 100644
index efb2775..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_organization data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("government_organization.tsv.zip", "government_organization.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd
deleted file mode 100644
index 56d42ee..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("inventor.tsv.zip", "inventor.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd b/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd
deleted file mode 100644
index 10e5b26..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for inventor_gender data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("inventor_gender.tsv.zip", "inventor_gender.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd b/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd
deleted file mode 100644
index b34cceb..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for ipcr data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("ipcr.tsv.zip", "ipcr.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd
deleted file mode 100644
index 136aa95..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for lawyer data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("lawyer.tsv.zip", "lawyer.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/location.Rmd b/03_bulk_download_read_in/R Scripts/archive/location.Rmd
deleted file mode 100644
index f6b8261..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/location.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("location.tsv.zip", "location.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd b/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd
deleted file mode 100644
index 8d45d29..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for location_assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("location_assignee.tsv.zip", "location_assignee.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd b/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd
deleted file mode 100644
index ee2881b..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for main class data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("mainclass.tsv.zip", "mainclass.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd
deleted file mode 100644
index 27bf95a..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for mainclass_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("mainclass_current.tsv.zip", "mainclass_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/nber.rmd b/03_bulk_download_read_in/R Scripts/archive/nber.rmd
deleted file mode 100644
index 891ed45..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/nber.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for nber data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("nber.tsv.zip", "nber.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd b/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd
deleted file mode 100644
index 29e82a9..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for nber_category data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("nber_category.tsv.zip", "nber_category.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd b/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd
deleted file mode 100644
index bb41d7d..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for nber_subcategory data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("nber_subcategory.tsv.zip", "nber_subcategory.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd b/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd
deleted file mode 100644
index 6b6e0f8..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for non_inventor_applicant data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("non_inventor_applicant.tsv.zip", "non_inventor_applicant.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd b/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd
deleted file mode 100644
index 8d15609..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for other reference data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("otherreference.tsv.zip", "otherreference.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/patent.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent.Rmd
deleted file mode 100644
index 0671d52..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/patent.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("patent.tsv.zip", "patent.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd
deleted file mode 100644
index d3b2b1e..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_contractawardnumber data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("patent_contractawardnumber.tsv.zip", "patent_contractawardnumber.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd
deleted file mode 100644
index a740e4e..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_govintorg data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("patent_govintorg.tsv.zip", "patent_govintorg.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd
deleted file mode 100644
index 8056a85..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("patent_inventor.tsv.zip", "patent_inventor.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd
deleted file mode 100644
index d9d22fb..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_lawyer data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("patent_lawyer.tsv.zip", "patent_lawyer.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd b/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd
deleted file mode 100644
index 4894b62..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for pct_data data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pct_data.tsv.zip", "pct_data.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd b/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd
deleted file mode 100644
index 9fb98b8..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for persistent_assignee_disambig data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("persistent_assignee_disambig.tsv.zip", "persistent_assignee_disambig.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd b/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd
deleted file mode 100644
index 2ccace0..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for persistent_inventor_disambig data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("persistent_inventor_disambig.tsv.zip", "persistent_inventor_disambig.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd
deleted file mode 100644
index 7947d52..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rawassignee.tsv.zip", "rawassignee.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd
deleted file mode 100644
index b980b03..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw examiner data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rawexaminer.tsv.zip", "rawexaminer.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd
deleted file mode 100644
index 20c561a..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rawinventor.tsv.zip", "rawinventor.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd
deleted file mode 100644
index 0ccb7b2..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw lawyer data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rawlawyer.tsv.zip", "rawlawyer.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd
deleted file mode 100644
index a7f75a9..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rawlocation.tsv.zip", "rawlocation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd
deleted file mode 100644
index 344f376..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for rel_app_text data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("rel_app_text.tsv.zip", "rel_app_text.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd b/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd
deleted file mode 100644
index 7bc2171..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for subclass data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("subclass.tsv.zip", "subclass.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd
deleted file mode 100644
index b963117..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for subclass_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("subclass_current.tsv.zip", "subclass_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd b/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd
deleted file mode 100644
index 2bd922b..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us_term_of_grant data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("us_term_of_grant.tsv.zip", "us_term_of_grant.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd
deleted file mode 100644
index f8d0946..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us application citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("usapplicationcitation.tsv.zip", "usapplicationcitation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd
deleted file mode 100644
index e493cfa..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us patent citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("uspatentcitation.tsv.zip", "uspatentcitation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd
deleted file mode 100644
index 9865a3c..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for uspc data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("uspc.tsv.zip", "uspc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd
deleted file mode 100644
index 64ab8c7..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for uspc_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("uspc_current.tsv.zip", "uspc_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd b/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd
deleted file mode 100644
index f2c6bfa..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for usreldoc data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("usreldoc.tsv.zip", "usreldoc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd b/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd
deleted file mode 100644
index 0ea7797..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for wipo data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("wipo.tsv.zip", "wipo.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd b/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd
deleted file mode 100644
index 527811f..0000000
--- a/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for wipo_field data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("wipo_field.tsv.zip", "wipo_field.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd
deleted file mode 100644
index a9c3b51..0000000
--- a/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for non_inventor_applicant data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_applicant_not_disambiguated.tsv.zip", "g_applicant_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_application.Rmd b/03_bulk_download_read_in/R Scripts/g_application.Rmd
deleted file mode 100644
index 109ad25..0000000
--- a/03_bulk_download_read_in/R Scripts/g_application.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_application.tsv.zip", "g_application.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd
deleted file mode 100644
index 7c4ce10..0000000
--- a/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_assignee_disambiguated.tsv.zip", "g_assignee_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd
deleted file mode 100644
index 9002006..0000000
--- a/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_assignee_not_disambiguated.tsv.zip", "g_assignee_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd
deleted file mode 100644
index ae9fc12..0000000
--- a/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for lawyer data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_attorney_disambiguated.tsv.zip", "g_attorney_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd
deleted file mode 100644
index c8d74eb..0000000
--- a/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw lawyer data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_attorney_not_disambiguated.tsv.zip", "g_attorney_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_botanic.Rmd b/03_bulk_download_read_in/R Scripts/g_botanic.Rmd
deleted file mode 100644
index e70b50e..0000000
--- a/03_bulk_download_read_in/R Scripts/g_botanic.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for botanic data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_botanic.tsv.zip", "g_botanic.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd
deleted file mode 100644
index 7564e1f..0000000
--- a/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_cpc_at_issue.tsv.zip", "g_cpc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd b/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd
deleted file mode 100644
index 98e259b..0000000
--- a/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_group data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_cpc_title.tsv.zip", "g_cpc_title.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd
deleted file mode 100644
index 26cc21e..0000000
--- a/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw examiner data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_examiner_not_disambiguated.tsv.zip", "g_examiner_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_figures.Rmd b/03_bulk_download_read_in/R Scripts/g_figures.Rmd
deleted file mode 100644
index a128454..0000000
--- a/03_bulk_download_read_in/R Scripts/g_figures.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for figures data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_figures.tsv.zip", "g_figures.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd
deleted file mode 100644
index 6a38b12..0000000
--- a/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for foreign citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_foreign_citation.tsv.zip", "g_foreign_citation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd
deleted file mode 100644
index e5827c9..0000000
--- a/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for foreign_priority data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_foreign_priority.tsv.zip", "g_foreign_priority.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd
deleted file mode 100644
index f544175..0000000
--- a/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_interest data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_gov_interest.tsv.zip", "g_gov_interest.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd
deleted file mode 100644
index 42fff94..0000000
--- a/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_contractawardnumber data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_gov_interest_contracts.tsv.zip", "g_gov_interest_contracts.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd
deleted file mode 100644
index 61828c6..0000000
--- a/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_interest data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_gov_interest_org.tsv.zip", "g_gov_interest_org.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd
deleted file mode 100644
index 1995350..0000000
--- a/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_inventor_disambiguated.tsv.zip", "g_inventor_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd
deleted file mode 100644
index 1850753..0000000
--- a/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_inventor_not_disambiguated.tsv.zip", "g_inventor_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd
deleted file mode 100644
index 0e77ff8..0000000
--- a/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for ipcr data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_ipc_at_issue.tsv.zip", "g_ipc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd
deleted file mode 100644
index 1c6fce5..0000000
--- a/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_location_disambiguated.tsv.zip", "g_location_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd
deleted file mode 100644
index a89e549..0000000
--- a/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_location_not_disambiguated.tsv.zip", "g_location_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd b/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd
deleted file mode 100644
index f5189db..0000000
--- a/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for other reference data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_other_reference.tsv.zip", "g_other_reference.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_patent.Rmd b/03_bulk_download_read_in/R Scripts/g_patent.Rmd
deleted file mode 100644
index 166e511..0000000
--- a/03_bulk_download_read_in/R Scripts/g_patent.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_patent.tsv.zip", "g_patent.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd b/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd
deleted file mode 100644
index 2dc323f..0000000
--- a/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for pct_data data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_pct_data.tsv.zip", "g_pct_data.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd b/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd
deleted file mode 100644
index df0908a..0000000
--- a/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for persistent_assignee_disambig data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_persistent_assignee.tsv.zip", "g_persistent_assignee.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd b/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd
deleted file mode 100644
index 3bd5872..0000000
--- a/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for persistent_inventor_disambig data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_persistent_inventor.tsv.zip", "g_persistent_inventor.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd
deleted file mode 100644
index 66b0d49..0000000
--- a/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for rel_app_text data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_rel_app_text.tsv.zip", "g_rel_app_text.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd
deleted file mode 100644
index 9417c66..0000000
--- a/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us application citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_us_application_citation.tsv.zip", "g_us_application_citation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd
deleted file mode 100644
index 6aa206c..0000000
--- a/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us patent citation data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_us_patent_citation.tsv.zip", "g_us_patent_citation.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd b/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd
deleted file mode 100644
index a6ed9c5..0000000
--- a/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for usreldoc data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_us_rel_doc.tsv.zip", "g_us_rel_doc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd b/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd
deleted file mode 100644
index 0c49a7c..0000000
--- a/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us_term_of_grant data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_us_term_of_grant.tsv.zip", "g_us_term_of_grant.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd
deleted file mode 100644
index 20c93ef..0000000
--- a/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for uspc data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_uspc_at_issue.tsv.zip", "g_uspc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd b/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd
deleted file mode 100644
index 3cd56a5..0000000
--- a/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for wipo data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("g_wipo_technology.tsv.zip", "g_wipo_technology.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd
deleted file mode 100644
index a1c6c4b..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for subclass data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_applicant_not_disambiguated.tsv.zip", "pg_applicant_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd
deleted file mode 100644
index f3f2776..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_assignee_disambiguated.tsv.zip", "pg_assignee_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd
deleted file mode 100644
index 74cff63..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw assignee data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_assignee_not_disambiguated.tsv.zip", "pg_assignee_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd
deleted file mode 100644
index ade632b..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_cpc_at_issue.tsv.zip", "pg_cpc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd
deleted file mode 100644
index cc8e732..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_current data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_cpc_current.tsv.zip", "pg_cpc_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd
deleted file mode 100644
index a075adc..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for cpc_group data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_cpc_title.tsv.zip", "pg_cpc_title.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd
deleted file mode 100644
index 793032b..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for foreign_priority data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_foreign_priority.tsv.zip", "pg_foreign_priority.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd
deleted file mode 100644
index 76d86bc..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_interest data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_gov_interest.tsv.zip", "pg_gov_interest.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd
deleted file mode 100644
index ee67327..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent_contractawardnumber data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_gov_interest_contracts.tsv.zip", "pg_gov_interest_contracts.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd
deleted file mode 100644
index 0a6260c..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for government_interest data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_gov_interest_org.tsv.zip", "pg_gov_interest_org.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd b/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd
deleted file mode 100644
index 5e996bb..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for patent data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_granted_pgpubs_crosswalk.tsv.zip", "pg_granted_pgpubs_crosswalk.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd
deleted file mode 100644
index 22434e3..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_inventor_disambiguated.tsv.zip", "pg_inventor_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd
deleted file mode 100644
index a88fe1e..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw inventor data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_inventor_not_disambiguated.tsv.zip", "pg_inventor_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd
deleted file mode 100644
index 9fabdd0..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for ipcr data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_ipc_at_issue.tsv.zip", "pg_ipc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd
deleted file mode 100644
index d9fc6fa..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_location_disambiguated.tsv.zip", "pg_location_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd
deleted file mode 100644
index 8b931b2..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for raw location data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_location_not_disambiguated.tsv.zip", "pg_location_not_disambiguated.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd b/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd
deleted file mode 100644
index 27e684d..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for pct_data data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_pct_data.tsv.zip", "pg_pct_data.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd b/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd
deleted file mode 100644
index 8dfb717..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for us_term_of_grant data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_published_application.tsv.zip", "pg_published_application.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd
deleted file mode 100644
index 1f79271..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for rel_app_text data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_rel_app_text.tsv.zip", "pg_rel_app_text.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd
deleted file mode 100644
index 11aebbd..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for uspc data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_uspc_at_issue.tsv.zip", "pg_uspc_at_issue.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd b/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd
deleted file mode 100644
index 7a97372..0000000
--- a/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: "Read-in script and summary information for wipo data"
-output: html_document
----
-
-```{r}
-library(data.table)
-library(vroom)
-```
-
-## Load patent file
-Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads")
-```{r}
-setwd("")
-data <- vroom::vroom(unz("pg_wipo_technology.tsv.zip", "pg_wipo_technology.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables.
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/03_bulk_download_read_in/README.md b/03_bulk_download_read_in/README.md
deleted file mode 100644
index 7eb7109..0000000
--- a/03_bulk_download_read_in/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# PatentsView-Code-Snippets
-
-# Bulk Download Files: Read-in Scripts
-
-Due to changes in the structure of the Bulk Download files, the PatentsView team has created template scripts in Python and R which demonstrate how to read in these tsv files.
-
-See the file format settings below:
-| Table | File(s) | Data Contains Line Break | Field Separator | Quote Settings | Quote Character |
-|------------------|-------------------------------|--------------------------|-----------------|---------------------------|-----------------|
-| claims | Yearly files from 1976 - 2000 | Yes | \t | Non Numeric Fields Quoted | " |
-| claims | 2001 data file | No | \t | Non Numeric Fields Quoted | " |
-| claims | Yearly files from 2002 - 2020 | Yes | \t | Non Numeric Fields Quoted | " |
-| brf_sum_text | Yearly files 1976 - 2020 | Yes | \t | Non Numeric Fields Quoted | " |
-| detail_desc_text | Yearly files from 1976 - 2000 | Yes | \t | Non Numeric Fields Quoted | " |
-| detail_desc_text | Yearly files from 2001 - 2004 | No | \t | Non Numeric Fields Quoted | unquoted |
-| detail_desc_text | Yearly files from 2005 - 2000 | Yes | \t | Non Numeric Fields Quoted | " |
-| draw_desc_text | Yearly files from 1976 - 2020 | Yes | \t | Non Numeric Fields Quoted | " |
-| all other tables | Single bulk file | No | \t | Non Numeric Fields Quoted | " |
diff --git a/04_bulk_pregrant_read_in/Python Scripts/application.py b/04_bulk_pregrant_read_in/Python Scripts/application.py
deleted file mode 100644
index a989714..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/application.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Number of figures and sheets
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "application.tsv.zip"
-f_name = "application.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py
deleted file mode 100644
index 2122e34..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2005 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2005.tsv.zip"
-f_name = "brf_sum_text_2005.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py
deleted file mode 100644
index 625df16..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2006 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2006.tsv.zip"
-f_name = "brf_sum_text_2006.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py
deleted file mode 100644
index 2738591..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2007 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2007.tsv.zip"
-f_name = "brf_sum_text_2007.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py
deleted file mode 100644
index 9ba7e9e..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2008 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2008.tsv.zip"
-f_name = "brf_sum_text_2008.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py
deleted file mode 100644
index e3dbeb3..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2009 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2009.tsv.zip"
-f_name = "brf_sum_text_2009.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py
deleted file mode 100644
index ee0865c..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2010 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2010.tsv.zip"
-f_name = "brf_sum_text_2010.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py
deleted file mode 100644
index 3096f26..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2011 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2011.tsv.zip"
-f_name = "brf_sum_text_2011.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py
deleted file mode 100644
index 25b1c2c..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2012 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2012.tsv.zip"
-f_name = "brf_sum_text_2012.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py
deleted file mode 100644
index 922f81e..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2013 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2013.tsv.zip"
-f_name = "brf_sum_text_2013.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py
deleted file mode 100644
index 0bd0dcf..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2014 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2014.tsv.zip"
-f_name = "brf_sum_text_2014.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py
deleted file mode 100644
index b3baac2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2015 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2015.tsv.zip"
-f_name = "brf_sum_text_2015.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py
deleted file mode 100644
index 76124db..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2016 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2016.tsv.zip"
-f_name = "brf_sum_text_2016.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py
deleted file mode 100644
index db986e4..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2017 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2017.tsv.zip"
-f_name = "brf_sum_text_2017.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py
deleted file mode 100644
index 04c5288..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2018 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2018.tsv.zip"
-f_name = "brf_sum_text_2018.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py
deleted file mode 100644
index c1c4528..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2019 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2019.tsv.zip"
-f_name = "brf_sum_text_2019.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py
deleted file mode 100644
index 884dfc3..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2020 brief summary text
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "brf_sum_text_2020.tsv.zip"
-f_name = "brf_sum_text_2020.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py
deleted file mode 100644
index 65c5bb2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2005 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2005.tsv.zip"
-f_name = "claim_2005.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py
deleted file mode 100644
index 50ca11b..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2006 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2006.tsv.zip"
-f_name = "claim_2006.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py
deleted file mode 100644
index 9043d72..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2007 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2007.tsv.zip"
-f_name = "claim_2007.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py
deleted file mode 100644
index 2db9a33..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2008 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2008.tsv.zip"
-f_name = "claim_2008.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py
deleted file mode 100644
index fc5a34b..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2009 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2009.tsv.zip"
-f_name = "claim_2009.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py
deleted file mode 100644
index c0a513b..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2010 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2010.tsv.zip"
-f_name = "claim_2010.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py
deleted file mode 100644
index fa1adc2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2011 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2011.tsv.zip"
-f_name = "claim_2011.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py
deleted file mode 100644
index 573f708..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2012 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2012.tsv.zip"
-f_name = "claim_2012.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py
deleted file mode 100644
index 93a693e..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2013 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2013.tsv.zip"
-f_name = "claim_2013.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py
deleted file mode 100644
index ec148e4..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2014 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2014.tsv.zip"
-f_name = "claim_2014.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py
deleted file mode 100644
index 53b90e2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2015 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2015.tsv.zip"
-f_name = "claim_2015.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py
deleted file mode 100644
index 17bff16..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2016 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2016.tsv.zip"
-f_name = "claim_2016.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py
deleted file mode 100644
index 3d6db13..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2017 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2017.tsv.zip"
-f_name = "claim_2017.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py
deleted file mode 100644
index dbbe2c5..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2018 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2018.tsv.zip"
-f_name = "claim_2018.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py
deleted file mode 100644
index 8864b6d..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2019 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2019.tsv.zip"
-f_name = "claim_2019.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py
deleted file mode 100644
index 12a4130..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for 2020 Claims Data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "claim_2020.tsv.zip"
-f_name = "claim_2020.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/cpc.py b/04_bulk_pregrant_read_in/Python Scripts/cpc.py
deleted file mode 100644
index 138c5f6..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/cpc.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Number of figures and sheets
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc.tsv.zip"
-f_name = "cpc.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py b/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py
deleted file mode 100644
index 9cc1ce7..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Number of figures and sheets
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "cpc_current.tsv.zip"
-f_name = "cpc_current.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py
deleted file mode 100644
index 8f6b1a0..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2005 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2005.tsv.zip"
-f_name = "detail_desc_text_2005.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py
deleted file mode 100644
index 6764318..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2006 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2006.tsv.zip"
-f_name = "detail_desc_text_2006.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py
deleted file mode 100644
index 376f055..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2007 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2007.tsv.zip"
-f_name = "detail_desc_text_2007.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py
deleted file mode 100644
index a8096ab..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2008 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2008.tsv.zip"
-f_name = "detail_desc_text_2008.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py
deleted file mode 100644
index ac391af..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2009 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2009.tsv.zip"
-f_name = "detail_desc_text_2009.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py
deleted file mode 100644
index 5b6bd0f..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2010 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2010.tsv.zip"
-f_name = "detail_desc_text_2010.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py
deleted file mode 100644
index 5fc7d0d..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2011 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2011.tsv.zip"
-f_name = "detail_desc_text_2011.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py
deleted file mode 100644
index 6ec0e29..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2012 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2012.tsv.zip"
-f_name = "detail_desc_text_2012.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py
deleted file mode 100644
index 967ff38..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2013 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2013.tsv.zip"
-f_name = "detail_desc_text_2013.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py
deleted file mode 100644
index e43aa33..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2014 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2014.tsv.zip"
-f_name = "detail_desc_text_2014.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py
deleted file mode 100644
index 8857199..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2015 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2015.tsv.zip"
-f_name = "detail_desc_text_2015.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py
deleted file mode 100644
index c536518..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2016 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2016.tsv.zip"
-f_name = "detail_desc_text_2016.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py
deleted file mode 100644
index 9a4d649..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2017 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2017.tsv.zip"
-f_name = "detail_desc_text_2017.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py
deleted file mode 100644
index 1736c47..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2018 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2018.tsv.zip"
-f_name = "detail_desc_text_2018.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py
deleted file mode 100644
index a9a56b6..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2019 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2019.tsv.zip"
-f_name = "detail_desc_text_2019.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py
deleted file mode 100644
index fc7047a..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2020 detail_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "detail_desc_text_2020.tsv.zip"
-f_name = "detail_desc_text_2020.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py
deleted file mode 100644
index df58059..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2007 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2007.tsv.zip"
-f_name = "draw_desc_text_2007.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py
deleted file mode 100644
index c355da0..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2008 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2008.tsv.zip"
-f_name = "draw_desc_text_2008.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py
deleted file mode 100644
index 94b1d72..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2009 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2009.tsv.zip"
-f_name = "draw_desc_text_2009.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py
deleted file mode 100644
index e26c796..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#Read-in script for 2010 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2010.tsv.zip"
-f_name = "draw_desc_text_2010.tsv"
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py
deleted file mode 100644
index 8230ede..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2011 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2011.tsv.zip"
-f_name = "draw_desc_text_2011.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py
deleted file mode 100644
index 8753c91..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2012 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2012.tsv.zip"
-f_name = "draw_desc_text_2012.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py
deleted file mode 100644
index 4b98a6d..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2013 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2013.tsv.zip"
-f_name = "draw_desc_text_2013.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py
deleted file mode 100644
index 7c25464..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2014 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2014.tsv.zip"
-f_name = "draw_desc_text_2014.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py
deleted file mode 100644
index 08e96fa..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2015 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2015.tsv.zip"
-f_name = "draw_desc_text_2015.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py
deleted file mode 100644
index aa571ee..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2016 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2016.tsv.zip"
-f_name = "draw_desc_text_2016.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py
deleted file mode 100644
index d1555b7..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2017 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2017.tsv.zip"
-f_name = "draw_desc_text_2017.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py
deleted file mode 100644
index 5e64113..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2018 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2018.tsv.zip"
-f_name = "draw_desc_text_2018.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py
deleted file mode 100644
index 03fb5ed..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2019 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2019.tsv.zip"
-f_name = "draw_desc_text_2019.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py
deleted file mode 100644
index 79bada2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for 2020 draw_desc_text Data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "draw_desc_text_2020.tsv.zip"
-f_name = "draw_desc_text_2020.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py b/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py
deleted file mode 100644
index d1bf3b2..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#Read-in script for Foreign priority data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "foreign_priority.tsv.zip"
-f_name = "foreign_priority.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py b/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py
deleted file mode 100644
index b5ea263..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#Read-in script for granted patent crosswalk data
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "granted_patent_crosswalk.tsv.zip"
-f_name = "_temp_patent_crosswalk.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/ipcr.py b/04_bulk_pregrant_read_in/Python Scripts/ipcr.py
deleted file mode 100644
index e837cf1..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/ipcr.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#Read-in script for International Patent Classification data for all patents (as of publication date)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "ipcr.tsv.zip"
-f_name = "ipcr.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/pct_data.py b/04_bulk_pregrant_read_in/Python Scripts/pct_data.py
deleted file mode 100644
index e01ebf6..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/pct_data.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for PCT data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "pct_data.tsv.zip"
-f_name = "pct_data.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/publication.py b/04_bulk_pregrant_read_in/Python Scripts/publication.py
deleted file mode 100644
index 45cafb7..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/publication.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#Read-in script for Publication data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-pd.set_option('display.max_columns', None)
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-file_name = "publication.tsv.zip"
-f_name = "publication.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py b/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py
deleted file mode 100644
index 105cdf3..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for Raw inventor information as it appears in the source text and XML files
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "rawinventor.tsv.zip"
-f_name = "rawinventor.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py b/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py
deleted file mode 100644
index d7ffd5e..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for Raw assignee information as it appears in the source text and XML files
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# Selecting the zip file.
-file_name = "rawassignee.tsv.zip"
-f_name = "rawassignee.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py b/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py
deleted file mode 100644
index bb205e7..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for rel_app_text data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "rel_app_text.tsv.zip"
-f_name = "rel_app_text.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/us_parties.py b/04_bulk_pregrant_read_in/Python Scripts/us_parties.py
deleted file mode 100644
index 836754d..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/us_parties.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#Read-in script for us_parties data
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-import numpy as np
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-file_name = "us_parties.tsv.zip"
-f_name = "us_parties.tsv"
-# Selecting the zip file.
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py b/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py
deleted file mode 100644
index 7af943a..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for U.S. related documents (post-2005 patents only)
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "usreldoc.tsv.zip"
-f_name = "usreldoc.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/Python Scripts/uspc.py b/04_bulk_pregrant_read_in/Python Scripts/uspc.py
deleted file mode 100644
index 935d211..0000000
--- a/04_bulk_pregrant_read_in/Python Scripts/uspc.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#Read-in script for USPC classification data for all patents
-
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-# Selecting the zip file.
-file_name = "uspc.tsv.zip"
-f_name = "uspc.tsv"
-zf = zip.ZipFile(file_name)
-# Reading the selected file in the zip.
-chunksize = 10 ** 4
-count = 1
-n_obs = 0
-final = []
-for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC):
- print('processing chunk: ' + str(count))
- n_obs += len(df)
- count += 1
- final.append(df)
-# Create data frame with all observations
-df = pd.concat(final)
-# Print summary of data: number of observations, columns, and each variable data type
-print(n_obs)
-print(df.dtypes)
\ No newline at end of file
diff --git a/04_bulk_pregrant_read_in/R Scripts/application.Rmd b/04_bulk_pregrant_read_in/R Scripts/application.Rmd
deleted file mode 100644
index 5124fa7..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/application.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "application"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-setwd("C:\\Users\\jtutor\\Downloads")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("application.tsv.zip", "application.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd
deleted file mode 100644
index 43818c2..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text.tsv.zip", "brf_sum_text.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd
deleted file mode 100644
index 3f3aa9d..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2005"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2005.tsv.zip", "brf_sum_text_2005.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd
deleted file mode 100644
index 6314172..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2006"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2006.tsv.zip", "brf_sum_text_2006.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd
deleted file mode 100644
index 944e926..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2007"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2007.tsv.zip", "brf_sum_text_2007.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd
deleted file mode 100644
index f3c6510..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2008"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2008.tsv.zip", "brf_sum_text_2008.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd
deleted file mode 100644
index b321fb3..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2009"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2009.tsv.zip", "brf_sum_text_2009.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd
deleted file mode 100644
index abaace3..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2010"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2010.tsv.zip", "brf_sum_text_2010.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd
deleted file mode 100644
index 597e2c3..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2011"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2011.tsv.zip", "brf_sum_text_2011.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd
deleted file mode 100644
index c3017b2..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2012"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2012.tsv.zip", "brf_sum_text_2012.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd
deleted file mode 100644
index 4547c7f..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2013"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2013.tsv.zip", "brf_sum_text_2013.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd
deleted file mode 100644
index 56de16c..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2014"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2014.tsv.zip", "brf_sum_text_2014.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd
deleted file mode 100644
index 014b211..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2015"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2015.tsv.zip", "brf_sum_text_2015.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd
deleted file mode 100644
index 01eab52..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2016"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2016.tsv.zip", "brf_sum_text_2016.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd
deleted file mode 100644
index d817d86..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2017"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2017.tsv.zip", "brf_sum_text_2017.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd
deleted file mode 100644
index 0540662..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2018"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2018.tsv.zip", "brf_sum_text_2018.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd
deleted file mode 100644
index e87d77f..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2019"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2019.tsv.zip", "brf_sum_text_2019.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd
deleted file mode 100644
index b1eb842..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "brf_sum_text_2020"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("brf_sum_text_2020.tsv.zip", "brf_sum_text_2020.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd
deleted file mode 100644
index b2d6a2e..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2005"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2005.tsv.zip", "claim_2005.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd
deleted file mode 100644
index cb2acd4..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2006"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2006.tsv.zip", "claim_2006.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd
deleted file mode 100644
index ed0c355..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2007"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2007.tsv.zip", "claim_2007.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd
deleted file mode 100644
index 8f466c4..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2008"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2008.tsv.zip", "claim_2008.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd
deleted file mode 100644
index ad7da30..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2009"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2009.tsv.zip", "claim_2009.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd
deleted file mode 100644
index 9fec56c..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2010"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2010.tsv.zip", "claim_2010.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd
deleted file mode 100644
index 9058700..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2011"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2011.tsv.zip", "claim_2011.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd
deleted file mode 100644
index 3998537..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2012"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2012.tsv.zip", "claim_2012.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd
deleted file mode 100644
index d4753ca..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2013"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2013.tsv.zip", "claim_2013.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd
deleted file mode 100644
index 710b25e..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2014"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2014.tsv.zip", "claim_2014.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd
deleted file mode 100644
index a8af866..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2015"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2015.tsv.zip", "claim_2015.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd
deleted file mode 100644
index 4ebb221..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2016"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2016.tsv.zip", "claim_2016.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd
deleted file mode 100644
index 6266834..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2017"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2017.tsv.zip", "claim_2017.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd
deleted file mode 100644
index d9a5243..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2018"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2018.tsv.zip", "claim_2018.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd
deleted file mode 100644
index 97a1cb1..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2019"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2019.tsv.zip", "claim_2019.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd
deleted file mode 100644
index 18112d1..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "claim_2020"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("claim_2020.tsv.zip", "claim_2020.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd b/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd
deleted file mode 100644
index 20aeffe..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "cpc"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("cpc.tsv.zip", "cpc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd b/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd
deleted file mode 100644
index b38497d..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "cpc_current"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("cpc_current.tsv.zip", "cpc_current.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd
deleted file mode 100644
index 1bf4f97..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2011"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2011.tsv.zip", "detail_desc_text_2011.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd
deleted file mode 100644
index 2ca0213..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2012"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2012.tsv.zip", "detail_desc_text_2012.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd
deleted file mode 100644
index 9a9525d..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2013"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2013.tsv.zip", "detail_desc_text_2013.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd
deleted file mode 100644
index 4d36b7c..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2014"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2014.tsv.zip", "detail_desc_text_2014.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd
deleted file mode 100644
index c54b9b4..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2015"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2015.tsv.zip", "detail_desc_text_2015.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd
deleted file mode 100644
index 26d79a9..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2016"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2016.tsv.zip", "detail_desc_text_2016.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd
deleted file mode 100644
index 83533b4..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2017"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2017.tsv.zip", "detail_desc_text_2017.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd
deleted file mode 100644
index 9629451..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2018"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2018.tsv.zip", "detail_desc_text_2018.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd
deleted file mode 100644
index 77b7985..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2019"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2019.tsv.zip", "detail_desc_text_2019.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd
deleted file mode 100644
index 50713f5..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "detail_desc_text_2020"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("detail_desc_text_2020.tsv.zip", "detail_desc_text_2020.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd
deleted file mode 100644
index 73e2753..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2007"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2007.tsv.zip", "draw_desc_text_2007.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd
deleted file mode 100644
index 2d289da..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2008"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2008.tsv.zip", "draw_desc_text_2008.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd
deleted file mode 100644
index 642ae40..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2009"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2009.tsv.zip", "draw_desc_text_2009.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd
deleted file mode 100644
index 530ed21..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2010"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2010.tsv.zip", "draw_desc_text_2010.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd
deleted file mode 100644
index cf847fb..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2011"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2011.tsv.zip", "draw_desc_text_2011.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd
deleted file mode 100644
index 87b9058..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2012"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2012.tsv.zip", "draw_desc_text_2012.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd
deleted file mode 100644
index 452e686..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2013"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2013.tsv.zip", "draw_desc_text_2013.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd
deleted file mode 100644
index 4a86c94..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2014"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2014.tsv.zip", "draw_desc_text_2014.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd
deleted file mode 100644
index 1e8e987..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2015"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2015.tsv.zip", "draw_desc_text_2015.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd
deleted file mode 100644
index 3de0940..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2016"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2016.tsv.zip", "draw_desc_text_2016.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd
deleted file mode 100644
index 983cef6..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2017"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2017.tsv.zip", "draw_desc_text_2017.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd
deleted file mode 100644
index bd9ea9a..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2018"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2018.tsv.zip", "draw_desc_text_2018.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd
deleted file mode 100644
index 5267a9a..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2019"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2019.tsv.zip", "draw_desc_text_2019.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd
deleted file mode 100644
index 04db8c4..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "draw_desc_text_2020"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("draw_desc_text_2020.tsv.zip", "draw_desc_text_2020.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd b/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd
deleted file mode 100644
index 803b253..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "foreign_priority"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("foreign_priority.tsv.zip", "foreign_priority.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd b/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd
deleted file mode 100644
index 091b9fb..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "granted_patent_crosswalk"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("granted_patent_crosswalk.tsv.zip", "_temp_patent_crosswalk.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd b/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd
deleted file mode 100644
index 57990c9..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "ipcr"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("ipcr.tsv.zip", "ipcr.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd b/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd
deleted file mode 100644
index e95f4fc..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "pct_data"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("pct_data.tsv.zip", "pct_data.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/publication.Rmd b/04_bulk_pregrant_read_in/R Scripts/publication.Rmd
deleted file mode 100644
index edddf57..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/publication.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "publication"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("publication.tsv.zip", "publication.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd b/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd
deleted file mode 100644
index 5969bad..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "rawassignee"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("rawassignee.tsv.zip", "rawassignee.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd b/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd
deleted file mode 100644
index bf66b04..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "rawinventor"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("rawinventor.tsv.zip", "rawinventor.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd b/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd
deleted file mode 100644
index e399cb6..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "rel_app_text"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("rel_app_text.tsv.zip", "rel_app_text.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd b/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd
deleted file mode 100644
index 2d4c8a7..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "us_parties"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("us_parties.tsv.zip", "us_parties.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd b/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd
deleted file mode 100644
index 07861cd..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "uspc"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("uspc.tsv.zip", "uspc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd b/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd
deleted file mode 100644
index 91cb5f3..0000000
--- a/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: "usreldoc"
-author: "Evelyn"
-date: "9/25/2020"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-library(vroom)
-
-# set directory
-knitr::opts_knit$set(root.dir = "")
-```
-
-## 2. Load patent file
-```{r}
-data <- vroom::vroom(unz("usreldoc.tsv.zip", "usreldoc.tsv"),
- delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A"))
-```
-
-## 3. Get descriptive information
-```{r}
-head(data)
-ncol(data)
-nrow(data)
-str(data)
-summary(data, na.rm=TRUE)
-```
diff --git a/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py b/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py
deleted file mode 100644
index 20d1f5c..0000000
--- a/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Read-in script for joining the pre-granted application and publication tables
-# Importing necessary packages.
-import os
-import zipfile as zip
-import pandas as pd
-import csv
-
-# Set up file path:
-# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads")
-os.chdir("")
-
-# specify the name of the application zip file and the name you want to use when unzipped
-app_zip = "application.tsv.zip"
-app = "application.tsv"
-
-# specify the name of the publication zip file and the name you want to use when unzipped
-pub_zip = "publication.tsv.zip"
-pub = "publication.tsv"
-
-# Selecting the zip files
-zf_app = zip.ZipFile(app_zip)
-zf_pub = zip.ZipFile(pub_zip)
-
-# Read the data into dataframes
-df_app = pd.read_csv(zf_app.open(app), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-df_pub = pd.read_csv(zf_pub.open(pub), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC)
-
-# Rename columns which are the same across both files
-df_app = df_app.rename(columns={'id':'id_app', 'date':'date_app', 'country':'country_app'})
-df_pub = df_pub.rename(columns={'id':'id_pub', 'date':'date_pub', 'country':'country_pub'})
-
-# Merge the two dataframes together
-merged = df_pub.merge(df_app, how="inner", on='document_number')
-
-# print the first 5 columns and the length of the dataframe
-print(merged.head())
-print(len(merged))
\ No newline at end of file
diff --git a/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd b/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd
deleted file mode 100644
index 9e1fbda..0000000
--- a/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd
+++ /dev/null
@@ -1,46 +0,0 @@
----
-title: "join application and publication"
-author: "Chris"
-date: "4/12/2021"
-output: html_document
----
-
-## 1. Set up
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-
-# load packages
-library(data.table)
-```
-
-## 2. Load application and publication files
-## The application file is too large to unzip in R so we recommend unzipping the files manually then running the code
-```{r}
-# set directory ex: setwd("/Users/username/Downloads")
-setwd("")
-
-# The application file may be too large to read into memory on some computers
-# If this is the case you can split the file into smaller pieces like so:
-# app <- fread("application.tsv", sep="\t", nrows = 100000, skip = 0)
-# where nrows is the number of rows to read from the file and skip is the number
-# of rows to skip from the start of the file
-
-app <- fread("application.tsv", sep="\t")
-pub <- fread("publication.tsv", sep="\t")
-```
-## 3. Rename columns that are the same across the two files
-```{r}
-colnames(app)[colnames(app) == 'id'] <- 'id_app'
-colnames(app)[colnames(app) == 'date'] <- 'date_app'
-colnames(app)[colnames(app) == 'country'] <- 'country_app'
-
-colnames(pub)[colnames(pub) == 'id'] <- 'id_pub'
-colnames(pub)[colnames(pub) == 'date'] <- 'date_pub'
-colnames(pub)[colnames(pub) == 'country'] <- 'country_pub'
-```
-
-
-## 3. Merge application and publication data using the document_number column
-```{r}
-merged <- merge(pub, app, by = "document_number")
-```
diff --git a/06_mysql_text_load_in/README.md b/06_mysql_text_load_in/README.md
deleted file mode 100644
index 07867c3..0000000
--- a/06_mysql_text_load_in/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-## Mysql Text Table Load-In Scripts
-
-the shell scripts in this folder are designed to load downloaded text table files into a user's own mysql database using a connection specified in the sql.conf file (template provided).
-the user must substitute in their own file path for the downloaded tsv files, and may need to change the year suffix for the file.
diff --git a/06_mysql_text_load_in/g_brf_sum_text.sh b/06_mysql_text_load_in/g_brf_sum_text.sh
deleted file mode 100644
index 7434c20..0000000
--- a/06_mysql_text_load_in/g_brf_sum_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_brf_sum_text_2022.tsv' INTO TABLE patent_text.brf_sum_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/g_claims.sh b/06_mysql_text_load_in/g_claims.sh
deleted file mode 100644
index 3855754..0000000
--- a/06_mysql_text_load_in/g_claims.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_claims_2022.tsv' INTO TABLE patent_text.claims_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/g_detail_desc_text.sh b/06_mysql_text_load_in/g_detail_desc_text.sh
deleted file mode 100644
index f94fea8..0000000
--- a/06_mysql_text_load_in/g_detail_desc_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_detail_desc_text_2022.tsv' INTO TABLE patent_text.detail_desc_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/g_draw_desc_text.sh b/06_mysql_text_load_in/g_draw_desc_text.sh
deleted file mode 100644
index 874ed3e..0000000
--- a/06_mysql_text_load_in/g_draw_desc_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_draw_desc_text_2022.tsv' INTO TABLE patent_text.draw_desc_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/pg_brf_sum_text.sh b/06_mysql_text_load_in/pg_brf_sum_text.sh
deleted file mode 100644
index 87a2bbe..0000000
--- a/06_mysql_text_load_in/pg_brf_sum_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_brf_sum_text_2022.tsv' INTO TABLE pgpubs_text.brf_sum_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/pg_claims.sh b/06_mysql_text_load_in/pg_claims.sh
deleted file mode 100644
index 1dd118f..0000000
--- a/06_mysql_text_load_in/pg_claims.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_claims_2022.tsv' INTO TABLE pgpubs_text.claims_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/pg_detail_desc_text.sh b/06_mysql_text_load_in/pg_detail_desc_text.sh
deleted file mode 100644
index b223430..0000000
--- a/06_mysql_text_load_in/pg_detail_desc_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_detail_desc_text_2022.tsv' INTO TABLE pgpubs_text.detail_desc_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/pg_draw_desc_text.sh b/06_mysql_text_load_in/pg_draw_desc_text.sh
deleted file mode 100644
index 032d097..0000000
--- a/06_mysql_text_load_in/pg_draw_desc_text.sh
+++ /dev/null
@@ -1 +0,0 @@
-mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_draw_desc_text_2022.tsv' INTO TABLE pgpubs_text.draw_desc_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
diff --git a/06_mysql_text_load_in/sql.conf b/06_mysql_text_load_in/sql.conf
deleted file mode 100644
index 2575d75..0000000
--- a/06_mysql_text_load_in/sql.conf
+++ /dev/null
@@ -1,5 +0,0 @@
-[client]
-host =
-user =
-password =
-port = 3306
diff --git a/07_PatentSearch_API_demo/README.md b/07_PatentSearch_API_demo/README.md
deleted file mode 100644
index 47cb16a..0000000
--- a/07_PatentSearch_API_demo/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# PatentSearch API demo
-
-The jupyter notebook in this folder demonstrates how to request and read data from the PatentsView PatentSearch API in Python.
-
-To use this notebok, users must have installed Python and the following Python packages and their dependnencies:
-* Jupyter
-* Pandas
-* Requests
-* JSON
\ No newline at end of file
diff --git a/07_Search_API_demo/PV Search API tutorial.ipynb b/07_Search_API_demo/PV Search API tutorial.ipynb
deleted file mode 100644
index e13d45e..0000000
--- a/07_Search_API_demo/PV Search API tutorial.ipynb
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# This file has moved\n",
- "## New Name, New Location\n",
- "PatentsView's Search API has become the PatentSearch API, and so the tutorial notebook has been renamed to match. \n",
- "You can find that resource at https://github.com/PatentsView/PatentsView-Code-Snippets/blob/master/07_PatentSearch_API_demo/PV%20PatentSearch%20API%20tutorial.ipynb \n",
- "\n",
- "Thank you for using PatentsView!"
- ]
- }
- ],
- "metadata": {
- "language_info": {
- "name": "python"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/07_Search_API_demo/README.md b/07_Search_API_demo/README.md
deleted file mode 100644
index 1207d01..0000000
--- a/07_Search_API_demo/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# This file has moved
-## New Name, New Location
-PatentsView's Search API has become the PatentSearch API, and so the tutorial notebook has been renamed to match.
-You can find that resource at https://github.com/PatentsView/PatentsView-Code-Snippets/blob/master/07_PatentSearch_API_demo/
-
-Thank you for using PatentsView!
\ No newline at end of file
diff --git a/07_PatentSearch_API_demo/PV PatentSearch API tutorial.ipynb b/PatentSearch/0-patentsearch-api-demo.ipynb
similarity index 100%
rename from 07_PatentSearch_API_demo/PV PatentSearch API tutorial.ipynb
rename to PatentSearch/0-patentsearch-api-demo.ipynb
diff --git a/07_PatentSearch_API_demo/notebook_images/detail_ex.png b/PatentSearch/notebook_images/detail_ex.png
similarity index 100%
rename from 07_PatentSearch_API_demo/notebook_images/detail_ex.png
rename to PatentSearch/notebook_images/detail_ex.png
diff --git a/07_PatentSearch_API_demo/notebook_images/schema_ex.png b/PatentSearch/notebook_images/schema_ex.png
similarity index 100%
rename from 07_PatentSearch_API_demo/notebook_images/schema_ex.png
rename to PatentSearch/notebook_images/schema_ex.png
diff --git a/README.md b/README.md
index 91011e9..d07fc4b 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,21 @@
-# PatentsView-Code-Snippets
+[](https://github.com/PatentsView/PatentsView-Code-Snippets/actions/workflows/pytest.yml)
-The code scripts in this repository are for general PatentsView users and serve a variety of purposes.
+# PatentsView Code Examples
-List of Resources:
+Examples working with [PatentsView's bulk data downloads](https://patentsview.org/download/data-download-tables) and [PatentsView's PatentSearch API](https://search.patentsview.org/docs/2024/11/06/2.2-release).
-| Folder | Description |
-| --- |--- |
-| 01_bulk_download_example_joins| *Provides code examples for joining bulk download files*|
-| 02_claims_examples| *Provides an example Jupyter Notebook demonstrating how to read claims data files in Python* |
-| 03_bulk_download_read_in| *Provides code examples for reading in bulk download files using R and Python*|
-| 04_bulk_pregrant_read_in| *Provides code examples for reading in bulk pregrant data download files using R and Python*|
-| 05_bulk_pregrant_joins| *Provides code examples for merging two tables with a shared key using R and Python*|
-| 06_mysql_text_load_in| *Provides code examples for reading bulk text data files into a user's own mysql database*|
-| 07_PatentSearch_API_demo| *Provides an example Jupyter Notebook demonstrating how to request and read data from the PatentsView PatentSearch API in Python*|
+## Examples List
+
+### Bulk Data Downloads
+- [Getting Started With PatentsView Data Downloads](data-downloads/0-getting-started.ipynb)
+
+### PatentSearch API
+- [PatentSearch API Demo](patentsearch/0-patentsearch-api-demo.ipynb)
+
+## Dependencies
+
+Python and R dependencies are specified in the [`environment.yml`](environment.yml) file and in individual code example files. You can install all dependencies using [**conda**](https://docs.conda.io/projects/conda/en/latest/index.html):
+```sh
+conda env update
+conda activate pv-code-examples
+```
\ No newline at end of file
diff --git a/data-downloads/0-getting-started.ipynb b/data-downloads/0-getting-started.ipynb
new file mode 100644
index 0000000..8a14377
--- /dev/null
+++ b/data-downloads/0-getting-started.ipynb
@@ -0,0 +1,800 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started With PatentsView Data Downloads\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Table of contents**