diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..4baa8b8 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,19 @@ +name: Pytest + +on: + push: + pull_request: + +jobs: + test-pandas-read-in: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} + steps: + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + environment-file: environment.yml + activate-environment: pv-code-examples + - run: pytest \ No newline at end of file diff --git a/.gitignore b/.gitignore index b5f2b45..0f367fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ -.ipynb_checkpoints .DS_Store +*.tsv +*.zip +*.pyc +__pycache__ +.pytest_cache \ No newline at end of file diff --git a/01_bulk_download_example_joins/README.md b/01_bulk_download_example_joins/README.md deleted file mode 100644 index 671a002..0000000 --- a/01_bulk_download_example_joins/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Bulk Download Files: Example Joins - -The code scripts in this folder walk through the joining of various bulk download files: - -| Script | Description | -| --- |--- | -| country_ipc_selection.R | *Demonstrates using ipc and location to select patents* | -| join_assignee.Rmd | *Describes joining of assignee, location, and patent tables*| -| join_assignee.html | *HTML version of join_assignee.Rmd*| -| join_inventor.Rmd | *Describes joining of inventor, location, and patent tables*| -| join_inventor.html | *HTML version of join_inventor.Rmd*| - -Note: - -You will need to set your working directory/folder path in the r setup block in the .Rmd scripts above. Here is the line you will need to change: - - knitr::opts_knit$set(root.dir = "TODO: ADD DIRECTORY PATH HERE") \ No newline at end of file diff --git a/01_bulk_download_example_joins/country_ipc_selection.R b/01_bulk_download_example_joins/country_ipc_selection.R deleted file mode 100755 index 964bf51..0000000 --- a/01_bulk_download_example_joins/country_ipc_selection.R +++ /dev/null @@ -1,77 +0,0 @@ -# imports -library(fastmatch) -library(dplyr) -library(tidyr) -library(stringr) - - -# download and unzip relevant files from bulk download site -# for additional file links, see site: https://patentsview.org/download/data-download-tables -ipc_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/ipcr.tsv.zip" -patent_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent.tsv.zip" -raw_location_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/rawlocation.tsv.zip" -other_applicant_table_url <- "https://s3.amazonaws.com/data.patentsview.org/download/non_inventor_applicant.tsv.zip" -pat_asgn_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent_assignee.tsv.zip" -pat_inv_url <- "https://s3.amazonaws.com/data.patentsview.org/download/patent_inventor.tsv.zip" - -dl_folder = "Path/To/My/Download/Folder" - -for (link in c(ipc_table_url, patent_table_url, raw_location_table_url, other_applicant_table_url, pat_asgn_url, pat_inv_url)) { - fnam = tail(str_split(link, '/')[[1]], 1) - download.file(link, paste0(dl_folder,'/',fnam), method = "curl") - unzip(paste0(dl_folder,'/',fnam), exdir = dl_folder) -} - -# faster %in% implementation to speed up repeated use below -# credit: https://stackoverflow.com/questions/32934933/faster-in-operator -`%fin%` <- function(x, table) { - stopifnot(require(fastmatch)) - fmatch(x, table, nomatch = 0L) > 0L -} - -#read in files and start filtering -#start with location and build from there -locs <- read.table(file = paste0(dl_folder,'/rawlocation.tsv'), header = T, sep = '\t') %>% - select(id, location_id, country) %>% #only need id and country columns - filter(country == "IN") # ISO alpha-2 code for India -# for other country codes, see https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes - -# next pick out the inventors, applicants, and assignees who have one of these location ids -invs_in_india <- read.table(file = paste0(dl_folder, '/patent_inventor.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>% - filter(location_id %fin% locs$location_id) - -other_applics_in_india <- read.table(file = paste0(dl_folder, '/non_inventor_applicant.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>% - select(patent_id, rawlocation_id) %>% - filter(rawlocation_id %fin% locs$id) - -asgns_in_india <- read.table(file = paste0(dl_folder, '/patent_assignee.tsv'), header = T, sep = '\t', stringsAsFactors = F) %>% - filter(location_id %fin% locs$location_id) - -#now we can combine these lists of patents and remove duplicates - -country_patlist <- c(invs_in_india$patent_id, asgns_in_india$patent_id, other_applics_in_india$patent_id) -country_patlist <- country_patlist[!duplicated(country_patlist)] - -# you can now optionally delete the above data frames to clear up some memory -rm(invs_in_india, other_applics_in_india, asgns_in_india) - -# next we'll filter these to the ones with desired IPC codes -good_ipc3 <- c('C07', 'C08', 'C12') -good_ipc4 <- c('A61K', 'A61P', 'C40B') - -final_patlist <- read.table(file = paste0(dl_folder,'/ipcr.tsv'), header = T, sep = '\t') %>% - select(patent_id, section, ipc_class, subclass) %>% - filter(patent_id %fin% country_patlist) %>% - mutate(ipc3 = paste0(section,ipc_class), ipc4 = paste0(section,ipc_class,subclass)) %>% - filter((ipc3 %fin% good_ipc3)|(ipc4 %fin% good_ipc4)) %>% - select(patent_id) - -# this should be a complete list of the patents that match your desired country and IPC codes -# from here you should be able to join any additional tables to get your desired full dataset -# e.g. - -mydata <- patlist %>% - merge(read.table(file = paste0(dl_folder, '/patent.tsv'), header = T, sep = '\t'), by.x=patent_id, by.y=id, all.x=T) - -# and export if desired: -write.csv(mydata, paste0(dl_folder,'/mydata.csv'), row.names = F) \ No newline at end of file diff --git a/01_bulk_download_example_joins/join_assignee.Rmd b/01_bulk_download_example_joins/join_assignee.Rmd deleted file mode 100644 index dc67996..0000000 --- a/01_bulk_download_example_joins/join_assignee.Rmd +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: "Merge Assignee, Location, and Patent Tables" -output: html_document ---- -#### Note: This HTML document was last generated on `r Sys.Date()` - - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -knitr::opts_knit$set(root.dir = "TODO: Add Directory Path Here") - -library(data.table) -library(scales) -``` - -## Load Patent Table -```{r, warning=FALSE } -patent <- fread("patent.tsv") -colnames(patent)[colnames(patent)=="type"] <- "patent_type" -colnames(patent)[colnames(patent)=="country"] <- "patent_country" -colnames(patent)[colnames(patent)=="id"] <- "patent_id" -head(patent) -``` - -## Load Assignee and Location Tables -```{r} -assignee <- fread("assignee.tsv") -location <- fread("location.tsv") - -``` -```{r} -head(assignee) -head(location) - -``` - -## Load Raw Location and Raw Assignee Tables -```{r, warning=FALSE} - -rawassignee <- fread("rawassignee.tsv") -rawlocation <- fread("rawlocation.tsv") - -``` -```{r} -head(rawassignee) -head(rawlocation) -``` - -## Join Raw Assignee and Assignee Tables -```{r} -merged_assignees <- merge(rawassignee, assignee, by.x = c('assignee_id', 'type', 'name_first', 'name_last', 'organization'), by.y = c('id','type', 'name_first', 'name_last', 'organization')) -``` - -```{r} -colnames(merged_assignees)[colnames(merged_assignees)=="type"] <- "assignee_type" - -head(merged_assignees) - -n_non_matches <- nrow(rawassignee) - nrow(merged_assignees) - -``` - -The assignee table contains `r comma_format()(nrow(assignee))` rows while the rawassignee table contains `r comma_format()(nrow(rawassignee))` rows. The merging of these tables results in a table with `r comma_format()(nrow(merged_assignees))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawassignee table which do not contain matches to the assignee table on the specified columns. - -## Join Raw Location and Location Tables -```{r} -head(location) -``` - -```{r} -colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id" -head(rawlocation) -``` - - -```{r} -merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country')) -``` - - - -```{r} -head(merged_locations) - -n_non_matches <- nrow(rawlocation) - nrow(merged_locations) - -``` - - -The location table contains `r comma_format()(nrow(location))` rows while the rawlocation table contains `r comma_format()(nrow(rawlocation))`rows. Merging these tables results in a merged_locations table of `r comma_format()(nrow(merged_locations))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table. - -## Join Merged_Locations and Merged_Assignees -```{r} -merged_assignee_location <- merge(merged_locations, merged_assignees, by.x = 'rawlocation_id', by.y = 'rawlocation_id') -``` - -```{r} -colnames(merged_assignee_location)[colnames(merged_assignee_location)=="type"] <- "assignee_type" -head(merged_assignee_location) -``` - -## Join Assignee, Location, and Patent Data -```{r} - -head(patent) -``` - -```{r} -# free up some memory for final merge -rm(rawassignee) -rm(rawlocation) -rm(assignee) -rm(location) -rm(merged_locations) - -complete_merge <- merge(patent, merged_assignee_location, by.x = 'patent_id', by.y = 'patent_id' ) -``` - -```{r} -head(complete_merge) -``` - -The merged_assignee_location table contains `r comma_format()(nrow(merged_assignee_location))` rows and the patent table contains `r comma_format()(nrow(patent))` rows. The complete_merge contains `r comma_format()(nrow(complete_merge))` rows. Thus, all of the patent_ids in the merged_assignee_location table have matches in the patent table. - diff --git a/01_bulk_download_example_joins/join_assignee.html b/01_bulk_download_example_joins/join_assignee.html deleted file mode 100644 index a817e41..0000000 --- a/01_bulk_download_example_joins/join_assignee.html +++ /dev/null @@ -1,558 +0,0 @@ - - - - - - - - - - - - - -Merge Assignee, Location, and Patent Tables - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - -
-

Note: This HTML document was last generated on 2020-01-14

-
-
-

Load Patent Table

-
patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000000     utility 10000000             US 2018-06-19
-## 2:  10000001     utility 10000001             US 2018-06-19
-## 3:  10000002     utility 10000002             US 2018-06-19
-## 4:  10000003     utility 10000003             US 2018-06-19
-## 5:  10000004     utility 10000004             US 2018-06-19
-## 6:  10000005     utility 10000005             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2:                                                                                                                                                                                     The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3:                                                                                                      The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4:                                                                                                                                                                                       The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                              The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6:                                                                                                        A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-##                                                                                                                                                                                                     title
-## 1:                                                                                                                                                  Coherent LADAR using intra-pixel quadrature detection
-## 2:                                                                                                                                            Injection molding machine and mold thickness control method
-## 3:                                                                                                                                             Method for manufacturing polymer film and co-extruded film
-## 4:                                                                                                                                                  Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6:                                                                                                                                           Article vacuum formation method and vacuum forming apparatus
-##    kind num_claims      filename withdrawn
-## 1:   B2         20 ipg180619.xml      NULL
-## 2:   B2         12 ipg180619.xml      NULL
-## 3:   B2          9 ipg180619.xml      NULL
-## 4:   B2         18 ipg180619.xml      NULL
-## 5:   B2          6 ipg180619.xml      NULL
-## 6:   B2          4 ipg180619.xml      NULL
-
-
-

Load Assignee and Location Tables

-
assignee <- fread("assignee.tsv")
-location <- fread("location.tsv")
-
head(assignee)
-
##                           id type name_first name_last
-## 1: 004bvflbsd8k0pjiga6l1hdm2    2       NULL      NULL
-## 2: 00aw1csuxll9yyqi67qq9m91q    2       NULL      NULL
-## 3: 00gkqk53hjao0mroltu3oo1qk    2       NULL      NULL
-## 4: 00grlqxhvjz0l7ylc5okvvtbg    2       NULL      NULL
-## 5: 00iquis3fn9yu6i0n8ojxex4c    2       NULL      NULL
-## 6: 00qivxg405hst268eg56psex6    2       NULL      NULL
-##                           organization
-## 1: GM Global Technology Operations LLC
-## 2: AT&T Intellectual Property II, L.P.
-## 3:                        McAfee, Inc.
-## 4:                   Sequa Corporation
-## 5:            CENTER CROSS ARCHERY LLC
-## 6:        Exceptional IP Holdings, LLC
-
head(location)
-
##              id           city state country latitude longitude county
-## 1: 000ii62xlraz        Esparto    CA      US  38.6922 -122.0160   Yolo
-## 2: 000iprvfzu2f           Limm  NULL      GB  11.1000   30.5167   NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe  NULL      DE  53.3763   10.5497   NULL
-## 4: 000osf8n2ysw     Bouxwiller  NULL      FR  47.5051    7.3453   NULL
-## 5: 0014a0zfh259        Morance  NULL      FR  45.8978    4.7004   NULL
-## 6: 0018uf8e2x0f    Dals Langed  NULL      SE  58.9167   12.3000   NULL
-##    state_fips county_fips
-## 1:          6        6113
-## 2:       NULL        NULL
-## 3:       NULL        NULL
-## 4:       NULL        NULL
-## 5:       NULL        NULL
-## 6:       NULL        NULL
-
-
-

Load Raw Location and Raw Assignee Tables

-
rawassignee <- fread("rawassignee.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
head(rawassignee)
-
##                         uuid patent_id              assignee_id
-## 1: 0000p94wkezw94s8cz7dbxlvz   5856666 org_fijoKOoRhIzrkYzecWF9
-## 2: 00013vk881wap9u4mbo7lwwhp   5204210 org_UrbE3xev7LUsnuvRjbep
-## 3: 000192sn2u10kzpikl4s7h3r0   5302149 org_JcXwBlJtb1uvcPKHeaYX
-## 4: 0001ycvv6sz1ju07ss99nhxi1   9104354 org_7fE5f5nnY6dbOc3vSaXb
-## 5: 0001z7ws4m14aqdb3tv99u550   6584517 org_sj7olrHxASyJDNVGczBe
-## 6: 0002ca4n6l0hop2dycg28wx3g   6970439 org_E6m1uPIsSbg4FlEl54lY
-##               rawlocation_id type name_first name_last
-## 1: orskbf54s58e97lkmw8na5rpx    2       NULL      NULL
-## 2: mue862v5lcjdhzqqk86ei75kj    2       NULL      NULL
-## 3: o1h9dqdv0yq7dt1b1vmrcal9h    3       NULL      NULL
-## 4: rspbpqcajvm09r1ew9mgnpx37    3       NULL      NULL
-## 5: l1gyelp5jcg0hakk9smmhsdgr    2       NULL      NULL
-## 6: 4s9aa5btsexwstspmj5ta5a6j    2       NULL      NULL
-##                                                  organization sequence
-## 1:                                   U.S. Philips Corporation        0
-## 2:                                          Xerox Corporation        0
-## 3: Commonwealth Scientific & Industrial Research Organization        1
-## 4:                                     Canon Kabushiki Kaisha        0
-## 5:                                Cypress Semiconductor Corp.        0
-## 6:                                             Motorola, Inc.        0
-
head(rawlocation)
-
##                           id  location_id        city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip   Sunnyvale    CA      US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz       Seoul  NULL      KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge    LA      US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z       Osaka  NULL      JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w      Austin    TX      US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z      Nundah  NULL      AU
-##              latlong
-## 1: 37.3689|-122.0353
-## 2:  37.5985|126.9783
-## 3:  30.4506|-91.1544
-## 4:  34.6937|135.5022
-## 5:  30.2669|-97.7428
-## 6: -27.4033|153.0603
-
-
-

Join Raw Assignee and Assignee Tables

-
merged_assignees <- merge(rawassignee, assignee, by.x = c('assignee_id', 'type', 'name_first', 'name_last', 'organization'), by.y = c('id','type', 'name_first', 'name_last', 'organization'))
-
colnames(merged_assignees)[colnames(merged_assignees)=="type"] <- "assignee_type"
-
-head(merged_assignees)
-
##                  assignee_id assignee_type name_first name_last
-## 1: 004bvflbsd8k0pjiga6l1hdm2             2       NULL      NULL
-## 2: 00aw1csuxll9yyqi67qq9m91q             2       NULL      NULL
-## 3: 00gkqk53hjao0mroltu3oo1qk             2       NULL      NULL
-## 4: 00grlqxhvjz0l7ylc5okvvtbg             2       NULL      NULL
-## 5: 00iquis3fn9yu6i0n8ojxex4c             2       NULL      NULL
-## 6: 00qivxg405hst268eg56psex6             2       NULL      NULL
-##                           organization                      uuid patent_id
-## 1: GM Global Technology Operations LLC pfqsbx9cud7900bsf30rpqhbn   8499738
-## 2: AT&T Intellectual Property II, L.P. 35u392dttglj2tk1lywtepfkq   7936730
-## 3:                        McAfee, Inc. q3at72duk25a02ujn35soumry   9148422
-## 4:                   Sequa Corporation zsrq73v0qr24hjmv3yms09xse   7246474
-## 5:            CENTER CROSS ARCHERY LLC go5lrbk3yph01byne9xojx4w3   9683819
-## 6:        Exceptional IP Holdings, LLC 3hbckl4pd9cyrqunipb21jobr   9296121
-##    rawlocation_id sequence
-## 1:           NULL        0
-## 2:           NULL        0
-## 3:           NULL        0
-## 4:           NULL        0
-## 5:           NULL        0
-## 6:           NULL        0
-
n_non_matches <- nrow(rawassignee) - nrow(merged_assignees)
-

The assignee table contains 486,381 rows while the rawassignee table contains 6,387,373 rows. The merging of these tables results in a table with 5,082,070 rows. Thus there are 1,305,303 rows in the rawassignee table which do not contain matches to the assignee table on the specified columns.

-
-
-

Join Raw Location and Location Tables

-
head(location)
-
##              id           city state country latitude longitude county
-## 1: 000ii62xlraz        Esparto    CA      US  38.6922 -122.0160   Yolo
-## 2: 000iprvfzu2f           Limm  NULL      GB  11.1000   30.5167   NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe  NULL      DE  53.3763   10.5497   NULL
-## 4: 000osf8n2ysw     Bouxwiller  NULL      FR  47.5051    7.3453   NULL
-## 5: 0014a0zfh259        Morance  NULL      FR  45.8978    4.7004   NULL
-## 6: 0018uf8e2x0f    Dals Langed  NULL      SE  58.9167   12.3000   NULL
-##    state_fips county_fips
-## 1:          6        6113
-## 2:       NULL        NULL
-## 3:       NULL        NULL
-## 4:       NULL        NULL
-## 5:       NULL        NULL
-## 6:       NULL        NULL
-
colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-
##               rawlocation_id  location_id        city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip   Sunnyvale    CA      US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz       Seoul  NULL      KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge    LA      US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z       Osaka  NULL      JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w      Austin    TX      US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z      Nundah  NULL      AU
-##              latlong
-## 1: 37.3689|-122.0353
-## 2:  37.5985|126.9783
-## 3:  30.4506|-91.1544
-## 4:  34.6937|135.5022
-## 5:  30.2669|-97.7428
-## 6: -27.4033|153.0603
-
merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-
head(merged_locations)
-
##     location_id    city state country            rawlocation_id
-## 1: 000ii62xlraz Esparto    CA      US 09g0uz60wjm5ynib3xa28z8bn
-## 2: 000ii62xlraz Esparto    CA      US 0fyd31upf36smdme7oms1nhzi
-## 3: 000ii62xlraz Esparto    CA      US 2e4xhj4rjkwvu4gydwyiojzea
-## 4: 000ii62xlraz Esparto    CA      US 2j0ps8x68j7wsajxzxvsg6txx
-## 5: 000ii62xlraz Esparto    CA      US 49st0yptqxr9b2ecbp8l3eq97
-## 6: 000ii62xlraz Esparto    CA      US 4fgmrp96itwtsla9tglzqu0gb
-##              latlong latitude longitude county state_fips county_fips
-## 1: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 2: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 3: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 4: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 5: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 6: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-
n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-

The location table contains 142,188 rows while the rawlocation table contains 27,460,928rows. Merging these tables results in a merged_locations table of 25,410,691 rows. Thus there are 2,050,237 rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.

-
-
-

Join Merged_Locations and Merged_Assignees

-
merged_assignee_location <- merge(merged_locations, merged_assignees, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-
colnames(merged_assignee_location)[colnames(merged_assignee_location)=="type"] <- "assignee_type"
-head(merged_assignee_location)
-
##               rawlocation_id  location_id         city state country
-## 1: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz        Seoul  NULL      KR
-## 2: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z        Osaka  NULL      JP
-## 3: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w       Austin    TX      US
-## 4: 00012bpxm3zw9ic3fntkt2y0n 4kqu1ib0ozsk   Washington    DC      US
-## 5: 0001jr4pe4nf6judjbjf7ni7j 4bzg3k4qm96v Carol Stream    IL      US
-## 6: 0001pjj8tpsgixdh0w2vzkf6l 0yaxi23jdpoo         Bath  NULL      GB
-##             latlong latitude longitude               county state_fips
-## 1: 37.5985|126.9783  37.5985  126.9780                 NULL       NULL
-## 2: 34.6937|135.5022  34.6937  135.5020                 NULL       NULL
-## 3: 30.2669|-97.7428  30.2669  -97.7428               Travis         48
-## 4:  38.895|-77.0367  38.8950  -77.0367 District of Columbia         11
-## 5: 41.9125|-88.1347  41.9125  -88.1347               DuPage         17
-## 6:  51.3794|-2.3656  51.3794   -2.3656                 NULL       NULL
-##    county_fips              assignee_id assignee_type name_first name_last
-## 1:        NULL org_FRO2wdVwzz55rpoigYJU             3       NULL      NULL
-## 2:        NULL org_Y0pGhGHfz4S9myhFkrDo             3       NULL      NULL
-## 3:       48453 org_OAbJ9MgUqayvWClZtJkP             2       NULL      NULL
-## 4:       11001 org_EZcyn2HosNuH0Dg5xTai             6       NULL      NULL
-## 5:       17043 org_lcg2WfQGP885kmwEebaG             2       NULL      NULL
-## 6:        NULL org_1Aaa2HuQFtcHPNU453yR             3       NULL      NULL
-##                                                                organization
-## 1:                                                   Kia Motors Corporation
-## 2:                                                    Panasonic Corporation
-## 3:                                            Freescale Semiconductor, Inc.
-## 4: The United States of America as represented by the Secretary of Commerce
-## 5:                                                     Maremont Corporation
-## 6:                                     Mindspeed Technologies U.K., Limited
-##                         uuid patent_id sequence
-## 1: u7whuasvhjt0ogf11wpkgzjqe  10308144        1
-## 2: an1w3xckrtghahpevy3rnhonk   7535814        0
-## 3: qf877chvhg50wvjl46nfwxmeq   7795674        0
-## 4: vrcvz7aqi5jew5669gtz3vgbo   4672851        0
-## 5: 23wrusl8imqex9g1bjj79etja   4460073        0
-## 6: yf2x9phh517ejjfa98iudu97c   8352955        0
-
-
-

Join Assignee, Location, and Patent Data

-
head(patent)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000000     utility 10000000             US 2018-06-19
-## 2:  10000001     utility 10000001             US 2018-06-19
-## 3:  10000002     utility 10000002             US 2018-06-19
-## 4:  10000003     utility 10000003             US 2018-06-19
-## 5:  10000004     utility 10000004             US 2018-06-19
-## 6:  10000005     utility 10000005             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2:                                                                                                                                                                                     The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3:                                                                                                      The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4:                                                                                                                                                                                       The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                              The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6:                                                                                                        A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-##                                                                                                                                                                                                     title
-## 1:                                                                                                                                                  Coherent LADAR using intra-pixel quadrature detection
-## 2:                                                                                                                                            Injection molding machine and mold thickness control method
-## 3:                                                                                                                                             Method for manufacturing polymer film and co-extruded film
-## 4:                                                                                                                                                  Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6:                                                                                                                                           Article vacuum formation method and vacuum forming apparatus
-##    kind num_claims      filename withdrawn
-## 1:   B2         20 ipg180619.xml      NULL
-## 2:   B2         12 ipg180619.xml      NULL
-## 3:   B2          9 ipg180619.xml      NULL
-## 4:   B2         18 ipg180619.xml      NULL
-## 5:   B2          6 ipg180619.xml      NULL
-## 6:   B2          4 ipg180619.xml      NULL
-
# free up some memory for final merge
-rm(rawassignee)
-rm(rawlocation)
-rm(assignee)
-rm(location)
-rm(merged_locations)
-
-complete_merge <- merge(patent, merged_assignee_location, by.x = 'patent_id', by.y = 'patent_id' )
-
head(complete_merge)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000000     utility 10000000             US 2018-06-19
-## 2:  10000004     utility 10000004             US 2018-06-19
-## 3:  10000007     utility 10000007             US 2018-06-19
-## 4:  10000008     utility 10000008             US 2018-06-19
-## 5:  10000010     utility 10000010             US 2018-06-19
-## 6:  10000011     utility 10000011             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      abstract
-## 1:                                                                     A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 3:                                                                                                                                                                                                                                                                                              An expanding tool comprising: an actuator comprising a cylindrical housing that defines an actuator housing cavity; a primary ram disposed within the actuator housing cavity, the primary ram defining an internal primary ram cavity; a secondary ram disposed within the internal primary ram cavity; a cam roller carrier coupled to a distal end of the secondary ram; a drive collar positioned within a distal end of the actuator housing cavity; a roller clutch disposed within an internal cavity defined by an inner surface of the drive collar; a shuttle cam positioned between the roller clutch and a distal end of the primary ram; an expander cone coupled to the primary ram; and an expander head operably coupled to the drive collar.
-## 4:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                A decorated strip of coated, heat-shrinkable, plastic sheet material is placed in a spiral slot formed in a silicone rubber mold. The spiral slot is defined by a spiral wall having a uniform wall thickness. Upon heating in an oven, the material shrinks, forming a resiliently expansible arc-shaped band that can be worn as a bracelet or wristband.
-## 5: 3-D printing system include development stations positioned to electrostatically transfer build and support materials to an intermediate transfer surface, a transfer station adjacent the intermediate transfer surface, guides adjacent the transfer station, and platens moving on the guides. The guides are shaped to direct the platens to repeatedly pass the transfer station and come in contact with the intermediate transfer surface at the transfer station. The intermediate transfer surface transfers a layer of the build and support materials to the platens each time the platens contact the intermediate transfer surface at the transfer station to successively form layers of the build and support materials on the platens. The platens and the intermediate transfer surface include rack and pinion structures that temporarily join at the transfer station, as the platens pass the transfer station, to align the platens with the intermediate transfer surface as the platens contact the intermediate transfer surface.
-## 6:                                                                                                                                                                                                                                          To reduce distortion in an additively manufactured part, a shrinking platform is formed from a metal particulate filler in a debindable matrix. Shrinking supports of the same material are formed above the shrinking platform, and a desired part of the same material is formed upon them. A sliding release layer is provided below the shrinking platform of equal or larger surface area than a bottom of the shrinking platform to lateral resistance between the shrinking platform and an underlying surface. The matrix is debound sufficient to form a shape-retaining brown part assembly including the shrinking platform, shrinking supports, and the desired part. The shape-retaining brown part assembly is heated to shrink all of the components together at a same rate via atomic diffusion.
-##                                                                                                                                                                                                     title
-## 1:                                                                                                                                                  Coherent LADAR using intra-pixel quadrature detection
-## 2: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 3:                                                                                                                                                                                     PEX expanding tool
-## 4:                                                                                                                                                                        Bracelet mold and method of use
-## 5:                                                                                                                                    3-D electrostatic printer using rack and pinion registration system
-## 6:                                                                                                                                                   Supports for sintering additively manufactured parts
-##    kind num_claims      filename withdrawn            rawlocation_id
-## 1:   B2         20 ipg180619.xml      NULL r7ep1i9cnoxp5af0i8in6li7s
-## 2:   B2          6 ipg180619.xml      NULL 6rehm9medwxuoreqbg7ded05v
-## 3:   B2         24 ipg180619.xml      NULL pcmrojq59hh42w1zx55c4vtnq
-## 4:   B2         11 ipg180619.xml      NULL jf04zgy27bbhss9zf7du9t13x
-## 5:   B2         20 ipg180619.xml      NULL 2xq5n6vql58bj2imf6qntrxas
-## 6:   B1         30 ipg180619.xml      NULL 85q4hydnb4k5nsztw1ldr5s6o
-##     location_id       city state country           latlong latitude
-## 1: sz07iwmerndr    Waltham    MA      US  42.3764|-71.2361  42.3764
-## 2: 7xx3c5j38uy4  Monterrey  NULL      MX 25.6866|-100.3161  25.6866
-## 3: 3kfz8rbattyy Brookfield    WI      US  43.0606|-88.1064  43.0606
-## 4: tm5n0nkk6vui  Northvale    NJ      US  41.0064|-73.9494  41.0064
-## 5: 41x16fll6h13    Norwalk    CT      US  41.1175|-73.4083  41.1175
-## 6: 0gzn02sfdkgl  Watertown    MA      US  42.3708|-71.1833  42.3708
-##    longitude    county state_fips county_fips              assignee_id
-## 1:  -71.2361 Middlesex         25       25017 org_AARE0vwmqWJVw3VEQhqO
-## 2: -100.3160      NULL       NULL        NULL org_i7HMMn3SSFuW0jGC8dZr
-## 3:  -88.1064  Waukesha         55       55133 org_il88HucVp82DFlrSiqHF
-## 4:  -73.9494    Bergen         34       34003 org_dltfa8HjhhcG1eN1Af3J
-## 5:  -73.4083 Fairfield          9        9001 org_UrbE3xev7LUsnuvRjbep
-## 6:  -71.1833 Middlesex         25       25017 org_Hg5ThdGNeUqwAVSISglk
-##    assignee_type name_first name_last                        organization
-## 1:             2       NULL      NULL                    Raytheon Company
-## 2:             3       NULL      NULL           ZUBEX INDUSTRIAL SA DE CV
-## 3:             2       NULL      NULL Milwaukee Electric Tool Corporation
-## 4:             2       NULL      NULL                      Alex Toys, LLC
-## 5:             2       NULL      NULL                   Xerox Corporation
-## 6:             2       NULL      NULL                    MARKFORGED, INC.
-##                         uuid sequence
-## 1: vtus5eb4i7ebzleux3c8qfuir        0
-## 2: unykdm4rllult7dlswivtojfo        0
-## 3: de8maqgwv2aheidnovnd95njr        0
-## 4: 9jbbya3cdigrjtqxkjjl39s4y        0
-## 5: grm825lxh91ko3n1feva20zqr        0
-## 6: 1h5v4oycegaoh7dp95itxi6jg        0
-

The merged_assignee_location table contains 4,680,039 rows and the patent table contains 7,144,425. The complete_merge contains 4,680,039 rows. Thus, all of the patent_ids in the merged_assignee_location table have matches in the patent table.

-
- - - - -
- - - - - - - - diff --git a/01_bulk_download_example_joins/join_inventor.Rmd b/01_bulk_download_example_joins/join_inventor.Rmd deleted file mode 100644 index 4471a7d..0000000 --- a/01_bulk_download_example_joins/join_inventor.Rmd +++ /dev/null @@ -1,136 +0,0 @@ ---- -title: "Merge Inventor, Location, and Patent Tables" -output: html_document ---- - -#### Note: This HTML document was last generated on `r Sys.Date()` - - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -knitr::opts_knit$set(root.dir = "TODO: Add Directory Path Here") - -library(data.table) -library(scales) -``` - -## Load Patent Table -```{r, warning=FALSE} -patent <- fread("patent.tsv") -colnames(patent)[colnames(patent)=="type"] <- "patent_type" -colnames(patent)[colnames(patent)=="country"] <- "patent_country" -colnames(patent)[colnames(patent)=="id"] <- "patent_id" -head(patent) - -``` - -## Load Inventor and Location Tables -```{r, warning=FALSE} - -inventor <- fread("inventor.tsv") -location <- fread("location.tsv") - -``` - -```{r} -head(inventor) -head(location) - -``` - -## Load Raw Inventor and Raw Location Tables -```{r, warning=FALSE} -rawinventor <- fread("rawinventor.tsv") -rawlocation <- fread("rawlocation.tsv") - -``` - -```{r} -head(rawinventor) -colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id" -head(rawlocation) - -``` - -## Join Raw Location and Location Tables -```{r} -merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country')) -``` - -```{r} -head(merged_locations) - -n_non_matches <- nrow(rawlocation) - nrow(merged_locations) -``` - - -The location table contains `r comma_format()(nrow(location))` rows while the rawlocation table contains `r comma_format()(nrow(rawlocation))` rows. Merging these tables results in a merged_locations table of `r comma_format()(nrow(merged_locations))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table. - -## Join Raw Inventor and Inventor Tables -```{r} -# free up some memory -rm(location) -rm(rawlocation) - -``` - -```{r} -head(inventor) -``` - -```{r} -head(rawinventor) -``` - -```{r} -merged_inventors <- merge(rawinventor, inventor, by.x = c('inventor_id', 'name_first', 'name_last'), by.y = c('id', 'name_first', 'name_last')) -``` - -```{r} - -head(merged_inventors) -n_non_matches <- nrow(rawinventor) - nrow(merged_inventors) -``` - -The inventor table contains `r comma_format()(nrow(inventor))` rows while the rawinventor table contains `r comma_format()(nrow(rawinventor))` rows. The resulting merged_inventors table contains `r comma_format()(nrow(merged_inventors))` rows. Thus there are `r comma_format()(n_non_matches)` rows in the rawinventor table which do not contain matching inventor_ids, first names, and last names in the inventor table. - -## Join Merged_Inventors and Merged_Locations -```{r} -# free up some memory -rm(inventor) -rm(rawinventor) -``` - -```{r} -merged_inventor_location <- merge(merged_inventors, merged_locations, by.x = 'rawlocation_id', by.y = 'rawlocation_id') -``` - - -```{r} -head(merged_inventor_location) -n_non_matches <- nrow(merged_locations) - nrow(merged_inventor_location) -``` - -The merged_locations table contains `r comma_format()(nrow(merged_locations))` rows, while the merged_inventors table contains `r comma_format()(nrow(merged_inventors))` rows. The resulting merge of these to tables contains `r comma_format()(nrow(merged_inventor_location))` rows. Thus, there are `r comma_format()(n_non_matches)` rows in the merged_locations table that are not matched by rawlocation_id in the merged_inventors table. - -```{r} -rm(merged_inventors) -rm(merged_locations) - -``` - - -```{r} -head(patent) -``` - -## Join Inventor, Location, and Patent Data -```{r} -complete_merge <- merge(patent, merged_inventor_location, by.x = c('patent_id'), by.y = c('patent_id')) -``` - -```{r} -head(complete_merge) -``` - -The merged_inventor_location table contains `r comma_format()(nrow(merged_inventor_location))` rows and the patent table contains `r comma_format()(nrow(patent))` rows. The complete_merge contains `r comma_format()(nrow(complete_merge))` rows. Thus, all of the patent_ids in the merged_inventor_location table have matches in the patent table. \ No newline at end of file diff --git a/01_bulk_download_example_joins/join_inventor.html b/01_bulk_download_example_joins/join_inventor.html deleted file mode 100644 index c583022..0000000 --- a/01_bulk_download_example_joins/join_inventor.html +++ /dev/null @@ -1,530 +0,0 @@ - - - - - - - - - - - - - -Merge Inventor, Location, and Patent Tables - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - -
-

Note: This HTML document was last generated on 2020-01-14

-
-
-

Load Patent Table

-
patent <- fread("patent.tsv")
-colnames(patent)[colnames(patent)=="type"] <- "patent_type"
-colnames(patent)[colnames(patent)=="country"] <- "patent_country"
-colnames(patent)[colnames(patent)=="id"] <- "patent_id"
-head(patent)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000000     utility 10000000             US 2018-06-19
-## 2:  10000001     utility 10000001             US 2018-06-19
-## 3:  10000002     utility 10000002             US 2018-06-19
-## 4:  10000003     utility 10000003             US 2018-06-19
-## 5:  10000004     utility 10000004             US 2018-06-19
-## 6:  10000005     utility 10000005             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2:                                                                                                                                                                                     The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3:                                                                                                      The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4:                                                                                                                                                                                       The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                              The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6:                                                                                                        A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-##                                                                                                                                                                                                     title
-## 1:                                                                                                                                                  Coherent LADAR using intra-pixel quadrature detection
-## 2:                                                                                                                                            Injection molding machine and mold thickness control method
-## 3:                                                                                                                                             Method for manufacturing polymer film and co-extruded film
-## 4:                                                                                                                                                  Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6:                                                                                                                                           Article vacuum formation method and vacuum forming apparatus
-##    kind num_claims      filename withdrawn
-## 1:   B2         20 ipg180619.xml      NULL
-## 2:   B2         12 ipg180619.xml      NULL
-## 3:   B2          9 ipg180619.xml      NULL
-## 4:   B2         18 ipg180619.xml      NULL
-## 5:   B2          6 ipg180619.xml      NULL
-## 6:   B2          4 ipg180619.xml      NULL
-
-
-

Load Inventor and Location Tables

-
inventor <- fread("inventor.tsv")
-location <- fread("location.tsv")
-
head(inventor)
-
##            id   name_first        name_last
-## 1: 10000001-2    Hyeon-Jae               Yu
-## 2: 10000004-1 Miguel Jorge Zubiria Elizondo
-## 3: 10000004-2    Jose Juan    Valadez Lopez
-## 4: 10000005-2    Katsunori              Oda
-## 5: 10000006-1         Marc           Saelen
-## 6: 10000014-2          Tim        Diehlmann
-
head(location)
-
##              id           city state country latitude longitude county
-## 1: 000ii62xlraz        Esparto    CA      US  38.6922 -122.0160   Yolo
-## 2: 000iprvfzu2f           Limm  NULL      GB  11.1000   30.5167   NULL
-## 3: 000ktsnnq96r Lauenburg/Elbe  NULL      DE  53.3763   10.5497   NULL
-## 4: 000osf8n2ysw     Bouxwiller  NULL      FR  47.5051    7.3453   NULL
-## 5: 0014a0zfh259        Morance  NULL      FR  45.8978    4.7004   NULL
-## 6: 0018uf8e2x0f    Dals Langed  NULL      SE  58.9167   12.3000   NULL
-##    state_fips county_fips
-## 1:          6        6113
-## 2:       NULL        NULL
-## 3:       NULL        NULL
-## 4:       NULL        NULL
-## 5:       NULL        NULL
-## 6:       NULL        NULL
-
-
-

Load Raw Inventor and Raw Location Tables

-
rawinventor <- fread("rawinventor.tsv")
-rawlocation <- fread("rawlocation.tsv")
-
head(rawinventor)
-
##                         uuid patent_id inventor_id
-## 1: 0000hccb98m2kc6g1v7128k5w   7646155   4341225-2
-## 2: 0000kwt5abwdu9f4av6zoa61t   4339721   4339721-2
-## 3: 0000n6xqianutadbzbgzwled7   6610738   6610738-6
-## 4: 0000n8nqsxhrztn7djlxou00k   6448562   6448562-2
-## 5: 0000p6jf5l8yzv04wimaoabab   4432679   4127345-1
-## 6: 0000tgs7dqvzrnfiad3b9fr06   8977851  6424872-10
-##               rawlocation_id name_first   name_last sequence rule_47
-## 1: omi6wqlrblholsssk9qx0dz5b   Samuel G       Woods        0        
-## 2: 8uifwkdu885g2jxfu2uhnzgp1     Takuya      Hosoda        1        
-## 3: hxh83mos96occibi6wg9fzfuy     Eva K.       Mudrn        5        
-## 4: cbejrj9ius2mty4ig27zqvj99     Muamer       Zukic        1        
-## 5: k7k03qpqzsoobzg1rgsr83q2e  Donald J. Angelosanto        0        
-## 6: ixb2b02fc6yq2q7fuj6hnycie  Lee Allen     Neitzel        0        
-##    deceased
-## 1:     NULL
-## 2:     NULL
-## 3:     NULL
-## 4:     NULL
-## 5:     NULL
-## 6:     NULL
-
colnames(rawlocation)[colnames(rawlocation)=="id"] <- "rawlocation_id"
-head(rawlocation)
-
##               rawlocation_id  location_id        city state country
-## 1: 00008o6jw8bp4c824ihgc1fcz qxm7bum6wvip   Sunnyvale    CA      US
-## 2: 0000a07fetfy6e2t4fjqc4nj6 cemoactpeldz       Seoul  NULL      KR
-## 3: 0000b606fnwnupiane3dfkoyl lmeg78jbn8nq Baton Rouge    LA      US
-## 4: 0000bztanu9rrtm943i8a7wry 1rfm0quusx8z       Osaka  NULL      JP
-## 5: 0000dqyk4jrapg90sz5a4eog6 kxmtp5terq5w      Austin    TX      US
-## 6: 0000g8ehwj0j8r2eubt3s0sso 25uh68jjdw8z      Nundah  NULL      AU
-##              latlong
-## 1: 37.3689|-122.0353
-## 2:  37.5985|126.9783
-## 3:  30.4506|-91.1544
-## 4:  34.6937|135.5022
-## 5:  30.2669|-97.7428
-## 6: -27.4033|153.0603
-
-
-

Join Raw Location and Location Tables

-
merged_locations <- merge(rawlocation, location, by.x = c('location_id', 'city', 'state', 'country'), by.y = c('id','city', 'state', 'country'))
-
head(merged_locations)
-
##     location_id    city state country            rawlocation_id
-## 1: 000ii62xlraz Esparto    CA      US 09g0uz60wjm5ynib3xa28z8bn
-## 2: 000ii62xlraz Esparto    CA      US 0fyd31upf36smdme7oms1nhzi
-## 3: 000ii62xlraz Esparto    CA      US 2e4xhj4rjkwvu4gydwyiojzea
-## 4: 000ii62xlraz Esparto    CA      US 2j0ps8x68j7wsajxzxvsg6txx
-## 5: 000ii62xlraz Esparto    CA      US 49st0yptqxr9b2ecbp8l3eq97
-## 6: 000ii62xlraz Esparto    CA      US 4fgmrp96itwtsla9tglzqu0gb
-##              latlong latitude longitude county state_fips county_fips
-## 1: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 2: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 3: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 4: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 5: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-## 6: 38.6922|-122.0161  38.6922  -122.016   Yolo          6        6113
-
n_non_matches <- nrow(rawlocation) - nrow(merged_locations)
-

The location table contains 142,188 rows while the rawlocation table contains 27,460,928 rows. Merging these tables results in a merged_locations table of 25,410,691 rows. Thus there are 2,050,237 rows in the rawlocation table which do not have a matching location_id, city, state, and country in the location table.

-
-
-

Join Raw Inventor and Inventor Tables

-
# free up some memory 
-rm(location)
-rm(rawlocation)
-
head(inventor)
-
##            id   name_first        name_last
-## 1: 10000001-2    Hyeon-Jae               Yu
-## 2: 10000004-1 Miguel Jorge Zubiria Elizondo
-## 3: 10000004-2    Jose Juan    Valadez Lopez
-## 4: 10000005-2    Katsunori              Oda
-## 5: 10000006-1         Marc           Saelen
-## 6: 10000014-2          Tim        Diehlmann
-
head(rawinventor)
-
##                         uuid patent_id inventor_id
-## 1: 0000hccb98m2kc6g1v7128k5w   7646155   4341225-2
-## 2: 0000kwt5abwdu9f4av6zoa61t   4339721   4339721-2
-## 3: 0000n6xqianutadbzbgzwled7   6610738   6610738-6
-## 4: 0000n8nqsxhrztn7djlxou00k   6448562   6448562-2
-## 5: 0000p6jf5l8yzv04wimaoabab   4432679   4127345-1
-## 6: 0000tgs7dqvzrnfiad3b9fr06   8977851  6424872-10
-##               rawlocation_id name_first   name_last sequence rule_47
-## 1: omi6wqlrblholsssk9qx0dz5b   Samuel G       Woods        0        
-## 2: 8uifwkdu885g2jxfu2uhnzgp1     Takuya      Hosoda        1        
-## 3: hxh83mos96occibi6wg9fzfuy     Eva K.       Mudrn        5        
-## 4: cbejrj9ius2mty4ig27zqvj99     Muamer       Zukic        1        
-## 5: k7k03qpqzsoobzg1rgsr83q2e  Donald J. Angelosanto        0        
-## 6: ixb2b02fc6yq2q7fuj6hnycie  Lee Allen     Neitzel        0        
-##    deceased
-## 1:     NULL
-## 2:     NULL
-## 3:     NULL
-## 4:     NULL
-## 5:     NULL
-## 6:     NULL
-
merged_inventors <- merge(rawinventor, inventor, by.x = c('inventor_id', 'name_first', 'name_last'), by.y = c('id', 'name_first', 'name_last'))
-
head(merged_inventors)
-
##    inventor_id   name_first        name_last                      uuid
-## 1:  10000001-2    Hyeon-Jae               Yu 5b2w88r3jwbmj2w5xf2u4ty80
-## 2:  10000004-1 Miguel Jorge Zubiria Elizondo luurs3qcxggn1rw211n9kt7gf
-## 3:  10000004-2    Jose Juan    Valadez Lopez ycf0b8c5ezlrx4pzhl111ftyt
-## 4:  10000005-2    Katsunori              Oda 4lw3e1g2uwntqcy2xgblb1cce
-## 5:  10000006-1         Marc           Saelen rwxm1mlf89apoxr91jsaqfk8m
-## 6:  10000006-1         Marc           Saelen xu020uor4gzorxepqeewgbbzn
-##    patent_id            rawlocation_id sequence rule_47 deceased
-## 1:  10000001 zh7bez8b146hfu0buyxaa0v9b        1             NULL
-## 2:  10000004 v8ucm2trnfns3id8krhmw8bty        0             NULL
-## 3:  10000004 mfezp7a3nfcq9r0pcu8qi286z        1             NULL
-## 4:  10000005 wylqo83g0a7zms7x227bjt9vx        1             NULL
-## 5:  10000006 rombrj5kywqys2cmzxk7p2rvc        0             NULL
-## 6:  10343329 zavbf7rou3y1q39qr8bl5rrpy        0             NULL
-
n_non_matches <- nrow(rawinventor) - nrow(merged_inventors)
-

The inventor table contains 3,857,228 rows while the rawinventor table contains 17,165,604 rows. The resulting merged_inventors table contains 14,697,445 rows. Thus there are 2,468,159 rows in the rawinventor table which do not contain matching inventor_ids, first names, and last names in the inventor table.

-
-
-

Join Merged_Inventors and Merged_Locations

-
# free up some memory
-rm(inventor)
-rm(rawinventor)
-
merged_inventor_location <- merge(merged_inventors, merged_locations, by.x = 'rawlocation_id', by.y = 'rawlocation_id')
-
head(merged_inventor_location)
-
##               rawlocation_id inventor_id   name_first     name_last
-## 1: 00008o6jw8bp4c824ihgc1fcz   6362662-2      Guoxing            Li
-## 2: 0000b606fnwnupiane3dfkoyl   6080888-2 Kannappan C. Chockalingham
-## 3: 0000iaytxly6w1a2q42efuk5p   4711077-7        Takeo       Hattori
-## 4: 0000mzv8qbiyubouljdzswns2   D693064-1  Nicholas H.        Dupree
-## 5: 0000nl2head6ga63f9iwoa4w1   3946173-1     Terry M.         Haber
-## 6: 0000u1ntofpo3bnr6mv36z9gn   4105471-2     James E.         Avery
-##                         uuid patent_id sequence rule_47 deceased
-## 1: yffjycuutcocumzt2msx2cwb7   8164309        0             NULL
-## 2: s32168nc17evwmldr6z9y6d4z   6080888        1             NULL
-## 3: 2m77qi6vvzfcp4ut11e64ohvs   D660122        1             NULL
-## 4: 5uhw2w0slnoc4qiba14r4wnjn   D850010        1             NULL
-## 5: 2x85x9q4ejdzifytyd1c9hnx3   5188615        0             NULL
-## 6: zc0j4hdr4l7l6lq0fkv8dq2yz   5942047        4             NULL
-##     location_id        city state country           latlong latitude
-## 1: qxm7bum6wvip   Sunnyvale    CA      US 37.3689|-122.0353  37.3689
-## 2: lmeg78jbn8nq Baton Rouge    LA      US  30.4506|-91.1544  30.4506
-## 3: ongtod0a98g7       Tokyo  NULL      JP   35.685|139.7514  35.6850
-## 4: x9z0kf8mxhvf  Providence    RI      US  41.8239|-71.4133  41.8239
-## 5: 9rublg2e2su2 Lake Forest    CA      US 33.6469|-117.6883  33.6469
-## 6: 2opf1msvy0zz    Issaquah    WA      US 47.5303|-122.0314  47.5303
-##    longitude           county state_fips county_fips
-## 1: -122.0350      Santa Clara          6        6085
-## 2:  -91.1544 East Baton Rouge         22       22033
-## 3:  139.7510             NULL       NULL        NULL
-## 4:  -71.4133       Providence         44       44007
-## 5: -117.6880           Orange          6        6059
-## 6: -122.0310             King         53       53033
-
n_non_matches <- nrow(merged_locations) - nrow(merged_inventor_location)
-

The merged_locations table contains 25,410,691 rows, while the merged_inventors table contains 14,697,445 rows. The resulting merge of these to tables contains 13,625,373 rows. Thus, there are 11,785,318 rows in the merged_locations table that are not matched by rawlocation_id in the merged_inventors table.

-
rm(merged_inventors)
-rm(merged_locations)
-
head(patent)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000000     utility 10000000             US 2018-06-19
-## 2:  10000001     utility 10000001             US 2018-06-19
-## 3:  10000002     utility 10000002             US 2018-06-19
-## 4:  10000003     utility 10000003             US 2018-06-19
-## 5:  10000004     utility 10000004             US 2018-06-19
-## 6:  10000005     utility 10000005             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  abstract
-## 1: A frequency modulated (coherent) laser detection and ranging system includes a read-out integrated circuit formed with a two-dimensional array of detector elements each including a photosensitive region receiving both return light reflected from a target and light from a local oscillator, and local processing circuitry sampling the output of the photosensitive region four times during each sample period clock cycle to obtain quadrature components. A data bus coupled to one or more outputs of each of the detector elements receives the quadrature components from each of the detector elements for each sample period and serializes the received quadrature components. A processor coupled to the data bus receives the serialized quadrature components and determines an amplitude and a phase for at least one interfering frequency corresponding to interference between the return light and the local oscillator light using the quadrature components.
-## 2:                                                                                                                                                                                     The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3:                                                                                                      The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4:                                                                                                                                                                                       The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5:                                                                                                                                                                                                                                                                                                                                                                                                                                                              The present invention relates to provides a double-oriented film, co-extrude, and of low thickness, with a layered composition that gives the property of being of high barrier to gases and manufactured by the process of co-extrusion of 3 bubbles, which gives the property of when being thermoformed, ensure the distribution of uniform thickness in the walls, base, folds, and corners of the formed tray saving a minimum of 50% of plastic without diminishing its gas barrier and its resistance to puncture.
-## 6:                                                                                                        A vacuum forming apparatus is provided that forms an article having a covering bonded to the surface of a substrate in a molding space using a first mold and a second mold. The vacuum forming apparatus is provided with clamps for grasping the covering between the first and second molds arranged at the open positions. The clamps are movable between an interfering position, at which the clamps are located in the movement ranges of the first and second molds, and standby positions, at which the clamps are outside the movement ranges. After the covering is heated, the clamps grasping the covering move to the standby positions and stretch the covering. The first and second molds move to the closed positions and the article is molded between the first and second molds so that the stretched covering and the substrate are bonded to each other.
-##                                                                                                                                                                                                     title
-## 1:                                                                                                                                                  Coherent LADAR using intra-pixel quadrature detection
-## 2:                                                                                                                                            Injection molding machine and mold thickness control method
-## 3:                                                                                                                                             Method for manufacturing polymer film and co-extruded film
-## 4:                                                                                                                                                  Method for producing a container from a thermoplastic
-## 5: Process of obtaining a double-oriented film, co-extruded, and of low thickness made by a three bubble process that at the time of being thermoformed provides a uniform thickness in the produced tray
-## 6:                                                                                                                                           Article vacuum formation method and vacuum forming apparatus
-##    kind num_claims      filename withdrawn
-## 1:   B2         20 ipg180619.xml      NULL
-## 2:   B2         12 ipg180619.xml      NULL
-## 3:   B2          9 ipg180619.xml      NULL
-## 4:   B2         18 ipg180619.xml      NULL
-## 5:   B2          6 ipg180619.xml      NULL
-## 6:   B2          4 ipg180619.xml      NULL
-
-
-

Join Inventor, Location, and Patent Data

-
complete_merge <- merge(patent, merged_inventor_location, by.x = c('patent_id'), by.y = c('patent_id'))
-
head(complete_merge)
-
##    patent_id patent_type   number patent_country       date
-## 1:  10000001     utility 10000001             US 2018-06-19
-## 2:  10000001     utility 10000001             US 2018-06-19
-## 3:  10000002     utility 10000002             US 2018-06-19
-## 4:  10000003     utility 10000003             US 2018-06-19
-## 5:  10000003     utility 10000003             US 2018-06-19
-## 6:  10000003     utility 10000003             US 2018-06-19
-##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             abstract
-## 1:                                                                                The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 2:                                                                                The injection molding machine includes a fixed platen, a moveable platen moving forward and backward by a toggle link, a base plate supporting the toggle link, a driving part for mold clamping to operate the toggle link, a driving part for mold thickness adjustment to adjust a mold thickness, and a control unit to calculate a movement distance gap before a clamping process by controlling the driving part for mold thickness adjustment to move the base plate backward and then move the base plate forward to a target movement position based on a fold amount of the toggle link, and control the driving part for mold thickness adjustment using a value obtained by deducting the movement distance gap from the fold amount of the toggle link when producing a clamp force.
-## 3: The present invention relates to: a method for manufacturing a polymer film, the method including a base film forming step for co-extruding a first resin containing a polyamide-based resin and a second resin containing a copolymer including polyamide-based segments and polyether-based segments; a co-extruded film including a base film including a first resin layer containing a polyamide-based resin, and a second resin layer containing a copolymer having polyamide-based segments and polyether-based segments; to a co-extruded film including a base film including a first resin layer and a second resin layer, which have different melting points; and to a method for manufacturing a polymer film, the method including a base film forming step including a step of co-extruding a first resin and a second resin, which have different melting points.
-## 4:                                                                                  The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 5:                                                                                  The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-## 6:                                                                                  The invention relates to a method for producing a container (2) from a thermoplastic, having at least one surround (4), provided in the container wall (1), for a container opening. The surround (4) comprises a structure behind which parts of the container wall (1) extend and/or which is penetrated by said parts. The method is carried out using a multi-part blow mold that has at least two mold parts, each having at least one cavity, wherein the surround is placed as an insert in the cavity (10) of the blow mold (7). The method comprises pressing the preform that has been forced into the cavity (10) into the structure of the surround (4) by means of a tool which is brought to bear on the preform (12) on the side of the preform facing away from the cavity (10).
-##                                                          title kind
-## 1: Injection molding machine and mold thickness control method   B2
-## 2: Injection molding machine and mold thickness control method   B2
-## 3:  Method for manufacturing polymer film and co-extruded film   B2
-## 4:       Method for producing a container from a thermoplastic   B2
-## 5:       Method for producing a container from a thermoplastic   B2
-## 6:       Method for producing a container from a thermoplastic   B2
-##    num_claims      filename withdrawn            rawlocation_id
-## 1:         12 ipg180619.xml      NULL 95igh4mbo217kt9dmb2x8r3il
-## 2:         12 ipg180619.xml      NULL zh7bez8b146hfu0buyxaa0v9b
-## 3:          9 ipg180619.xml      NULL wq19fdjfax9nrdzk88v66n3hy
-## 4:         18 ipg180619.xml      NULL bg0r7733kk1djxew6d0jsfcr8
-## 5:         18 ipg180619.xml      NULL c9av9x4gxgkr3bvhc2dtrhsvz
-## 6:         18 ipg180619.xml      NULL d0bw7mgqb97j92ibz1xjxmfr7
-##    inventor_id name_first name_last                      uuid sequence
-## 1:   7354823-3    Sun-Woo       Lee enlwyeih9u2bjg5io9wvedcdq        0
-## 2:  10000001-2  Hyeon-Jae        Yu 5b2w88r3jwbmj2w5xf2u4ty80        1
-## 3:   9833943-2 Dong-Hyeon      Choi m5jfzgdhewd0gk2owlju7je84        2
-## 4:   8603280-2    Carsten  Elsasser ilemj1o687jzj3wah1if9ubfw        1
-## 5:   8721828-1      Guido  Bergmann 6i8q3apa83yhwrxxg04s1988v        0
-## 6:   9776357-4   Cristoph    Mehren 81eavybjwpqpb4vcaieg78l8e        2
-##    rule_47 deceased  location_id         city state country
-## 1:             NULL ze3x8q3hwgh8     Gunpo-si  NULL      KR
-## 2:             NULL 0e1go7wy364j     Ansan-si  NULL      KR
-## 3:             NULL gilg4hcy289v    Yongin-si  NULL      KR
-## 4:             NULL fdqcb3ukf6sw      Pulheim  NULL      DE
-## 5:             NULL qgidi0xps9o6 St. Augustin  NULL      DE
-## 6:             NULL 9bxbajqqn4wg Konigswinter  NULL      DE
-##             latlong latitude longitude county state_fips county_fips
-## 1: 37.3617|126.9352  37.3617  126.9350   NULL       NULL        NULL
-## 2: 37.3219|126.8309  37.3219  126.8310   NULL       NULL        NULL
-## 3: 37.2411|127.1776  37.2411  127.1780   NULL       NULL        NULL
-## 4:         51.0|6.8  51.0000    6.8000   NULL       NULL        NULL
-## 5:    50.7554|7.182  50.7554    7.1820   NULL       NULL        NULL
-## 6:   50.6833|7.1833  50.6833    7.1833   NULL       NULL        NULL
-

The merged_inventor_location table contains 13,625,373 rows and the patent table contains 7,144,425 rows. The complete_merge contains 13,625,373 rows. Thus, all of the patent_ids in the merged_inventor_location table have matches in the patent table.

-
- - - - -
- - - - - - - - diff --git a/02_claims_examples/Claims Example.ipynb b/02_claims_examples/Claims Example.ipynb deleted file mode 100644 index 1ace450..0000000 --- a/02_claims_examples/Claims Example.ipynb +++ /dev/null @@ -1,207 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Claims Parsing\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:04.693196Z", - "start_time": "2020-05-10T20:39:04.681724Z" - } - }, - "outputs": [], - "source": [ - "import csv\n", - "pandas_chunksize = 100000\n", - "delimiter =\"\\t\"\n", - "filepath = \"claims_2005.tsv\"\n", - "quote_type =csv.QUOTE_NONNUMERIC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:06.355920Z", - "start_time": "2020-05-10T20:39:05.866932Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:06.497168Z", - "start_time": "2020-05-10T20:39:06.490860Z" - } - }, - "outputs": [], - "source": [ - "total_rows = 0 " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:31.874577Z", - "start_time": "2020-05-10T20:39:07.897470Z" - }, - "scrolled": true - }, - "outputs": [], - "source": [ - "claims_chunks = pd.read_csv(\n", - " filepath,\n", - " sep=delimiter,\n", - " chunksize=pandas_chunksize,\n", - " quoting=quote_type,\n", - " quotechar='\"')\n", - "\n", - "for claim_chunk in claims_chunks:\n", - " total_rows += claim_chunk.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:31.896463Z", - "start_time": "2020-05-10T20:39:31.880724Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "3628513" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "total_rows" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using CSV reader" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:39:31.908592Z", - "start_time": "2020-05-10T20:39:31.901140Z" - } - }, - "outputs": [], - "source": [ - "import csv" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:41:12.864381Z", - "start_time": "2020-05-10T20:41:12.857797Z" - } - }, - "outputs": [], - "source": [ - "total_rows=0" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:41:33.025448Z", - "start_time": "2020-05-10T20:41:13.005490Z" - } - }, - "outputs": [], - "source": [ - "with open(filepath) as fp:\n", - " reader = csv.reader(fp, delimiter=delimiter,quotechar='\"', quoting=quote_type)\n", - " for csv_row in reader:\n", - " total_rows+=1" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2020-05-10T20:41:33.034002Z", - "start_time": "2020-05-10T20:41:33.027644Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3628514\n" - ] - } - ], - "source": [ - "print(total_rows)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/02_claims_examples/README.md b/02_claims_examples/README.md deleted file mode 100644 index 3a67301..0000000 --- a/02_claims_examples/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Claims Parsing Settings - -| Setting | Value | -|-----------------|----------------------------------------------------------------------------------------------------------------------------------------| -| Delimiter | Tab (\t) | -| Quote Character | Double Quote (") | -| Quote Setting | Quote Non Numeric | -| Python Example | [Example Code on GitHub](https://github.com/CSSIP-AIR/PatentsView-Code-Snippets/blob/master/02_claims_examples/Claims%20Example.ipynb) | diff --git a/03_bulk_download_read_in/Python Scripts/README.md b/03_bulk_download_read_in/Python Scripts/README.md deleted file mode 100644 index 0bdda6e..0000000 --- a/03_bulk_download_read_in/Python Scripts/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# PatentsView-Code-Snippets - -# Bulk Download Files: Python Read-in Scripts - -Below is a list of all bulk download files and information on whether or not there is a template read-in script currently available. - -If the script for a file is not currently available, other scripts in this repository can be used as reference. The files are all structured in the same manner so you should be able to use a template from a different file to help determine how to proceed. - -List of Resources: - -All Python Scripts were created using Python 3.7.4 - -Necessary Packages: - -- zipfile -- pandas -- os - -| Bulk Download File | Status of Script | -| --- |--- | -| application | *Available* | -| assignee | *Available* | -| botanic | *Available* | -| brf_sum_text | *In Progress* | -| claim | *Available* | -| cpc_current | *Available* | -| cpc_group | *Available* | -| cpc_subgroup | *Available* | -| cpc_subsection | *Available* | -| draw_desc_text | *In Progress* | -| detail_desc_text | *In Progress* | -| figures | *Available* | -| foreign_priority | *Available* | -| foreigncitation | *Available* | -| government_interest | *Available* | -| government_organization | *Available* | -| inventor | *Available* | -| inventor_gender | *Available* | -| ipcr | *Available* | -| lawyer | *Available* | -| location | *Available* | -| location_assignee | *Available* | -| location_inventor | *In Progress* | -| mainclass | *Available* | -| mainclass_current | *Available* | -| nber | *Available* | -| nber_category | *Available* | -| nber_subcategory | *Available* | -| non_inventor_applicant | *Available* | -| otherreference | *Available* | -| patent | *Available* | -| patent_assignee | *In Progress* | -| patent_contractawardnumber | *Available* | -| patent_govintorg | *Available* | -| patent_inventor | *Available* | -| patent_lawyer | *Available* | -| pct_data | *Available* | -| persistent_assignee_disambig | *Available* | -| persistent_inventor_disambig | *Available* | -| rawassignee | *Available* | -| rawexaminer | *Available* | -| rawinventor | *Available* | -| rawlawyer | *Available* | -| rawlocation | *Available* | -| rel_app_text | *Available* | -| subclass | *Available* | -| subclass_current | *Available* | -| us_term_of_grant | *Available* | -| usapplicationcitation | *Available* | -| uspatentcitation | *Available* | -| uspc | *Available* | -| uspc_current | *Available* | -| usreldoc | *Available* | -| wipo | *Available* | -| wipo_field | *Available* | \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/application.py b/03_bulk_download_read_in/Python Scripts/archive/application.py deleted file mode 100644 index 5a57ff4..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/application.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "application.tsv.zip" -f_name = "application.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/assignee.py b/03_bulk_download_read_in/Python Scripts/archive/assignee.py deleted file mode 100644 index 09fbe77..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/assignee.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "assignee.tsv.zip" -f_name = "assignee.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/botanic.py b/03_bulk_download_read_in/Python Scripts/archive/botanic.py deleted file mode 100644 index 77b202a..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/botanic.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "botanic.tsv.zip" -f_name = "botanic.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -dtype={'sequence': int} -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py deleted file mode 100644 index 1333bd7..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1976.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1976 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1976.tsv.zip" -f_name = "claims_1976.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py deleted file mode 100644 index 489e7d0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1977.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for 1977 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1977.tsv.zip" -f_name = "claims_1977.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py deleted file mode 100644 index a7fe6b0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1978.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1978 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1978.tsv.zip" -f_name = "claims_1978.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py deleted file mode 100644 index e39a4cd..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1979.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1979 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1979.tsv.zip" -f_name = "claims_1979.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py deleted file mode 100644 index 8cdbef2..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1980.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for 1980 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1980.tsv.zip" -f_name = "claims_1980.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py deleted file mode 100644 index c2e279c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1981.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1981 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1981.tsv.zip" -f_name = "claims_1981.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py deleted file mode 100644 index 91e2645..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1982.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1982 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1982.tsv.zip" -f_name = "claims_1982.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py deleted file mode 100644 index bd5959b..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1983.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1983 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1983.tsv.zip" -f_name = "claims_1983.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py deleted file mode 100644 index b9352f0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1984.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1984 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1984.tsv.zip" -f_name = "claims_1984.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py deleted file mode 100644 index 3336118..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1985.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1985 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1985.tsv.zip" -f_name = "claims_1985.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py deleted file mode 100644 index 8aac0e5..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1986.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for 1986 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1986.tsv.zip" -f_name = "claims_1986.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py deleted file mode 100644 index e462adc..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1987.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1987 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1987.tsv.zip" -f_name = "claims_1987.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py deleted file mode 100644 index dd808fe..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1988.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1988 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1988.tsv.zip" -f_name = "claims_1988.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py deleted file mode 100644 index ee4209e..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1989.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1989 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1989.tsv.zip" -f_name = "claims_1989.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py deleted file mode 100644 index 7ecf193..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1990.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1990 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1990.tsv.zip" -f_name = "claims_1990.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py deleted file mode 100644 index 0bceb46..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1991.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1991 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1991.tsv.zip" -f_name = "claims_1991.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py deleted file mode 100644 index c27c0d6..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1992.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1992 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1992.tsv.zip" -f_name = "claims_1992.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py deleted file mode 100644 index 39749cb..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1993.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1993 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1993.tsv.zip" -f_name = "claims_1993.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py deleted file mode 100644 index 388933c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1994.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1994 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1994.tsv.zip" -f_name = "claims_1994.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py deleted file mode 100644 index 797fa2e..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1995.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1995 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1995.tsv.zip" -f_name = "claims_1995.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py deleted file mode 100644 index d6dafb5..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1996.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1996 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1996.tsv.zip" -f_name = "claims_1996.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py deleted file mode 100644 index eb4eb41..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1997.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1997 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1997.tsv.zip" -f_name = "claims_1997.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py deleted file mode 100644 index 5c1f6c5..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1998.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1998 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1998.tsv.zip" -f_name = "claims_1998.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py deleted file mode 100644 index bacab79..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_1999.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 1999 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_1999.tsv.zip" -f_name = "claims_1999.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py deleted file mode 100644 index 59f4794..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2000.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2000 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2000.tsv.zip" -f_name = "claims_2000.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py deleted file mode 100644 index d50944b..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2001.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2001 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2001.tsv.zip" -f_name = "claims_2001.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py deleted file mode 100644 index 1a50dca..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2002.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2002 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2002.tsv.zip" -f_name = "claims_2002.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py deleted file mode 100644 index 64a2ebe..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2003.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2003 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2003.tsv.zip" -f_name = "claims_2003.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py deleted file mode 100644 index 116e3d5..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2004.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2004 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2004.tsv.zip" -f_name = "claims_2004.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py deleted file mode 100644 index 23bc47d..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2005.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2005 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2005.tsv.zip" -f_name = "claims_2005.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py deleted file mode 100644 index 170b70c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2006.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2006 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2006.tsv.zip" -f_name = "claims_2006.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py deleted file mode 100644 index 26e0f31..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2007.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2007 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2007.tsv.zip" -f_name = "claims_2007.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py deleted file mode 100644 index be148e1..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2008.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2008 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2008.tsv.zip" -f_name = "claims_2008.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py deleted file mode 100644 index 0c815a8..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2009.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2009 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2009.tsv.zip" -f_name = "claims_2009.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py deleted file mode 100644 index 225e406..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2010.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2010 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2010.tsv.zip" -f_name = "claims_2010.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py deleted file mode 100644 index 7caaf7b..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2011.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2011 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2011.tsv.zip" -f_name = "claims_2011.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py deleted file mode 100644 index cf7c65c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2012.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2012 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2012.tsv.zip" -f_name = "claims_2012.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py deleted file mode 100644 index 30c9839..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2013.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2013 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2013.tsv.zip" -f_name = "claims_2013.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py deleted file mode 100644 index a7d6faf..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2014.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2014 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2014.tsv.zip" -f_name = "claims_2014.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py deleted file mode 100644 index ed368a8..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2015.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2015 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2015.tsv.zip" -f_name = "claims_2015.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py deleted file mode 100644 index a0f6009..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2016.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2016 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2016.tsv.zip" -f_name = "claims_2016.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py deleted file mode 100644 index 40e8fe1..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2017.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2017 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2017.tsv.zip" -f_name = "claims_2017.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py deleted file mode 100644 index e9901fd..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2018.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2018 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2018.tsv.zip" -f_name = "claims_2018.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py deleted file mode 100644 index f4cf4b0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2019.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2019 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2019.tsv.zip" -f_name = "claims_2019.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py b/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py deleted file mode 100644 index ffaf535..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/claims/claims_2020.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for 2020 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "claims_2020.tsv.zip" -f_name = "claims_2020.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py deleted file mode 100644 index 234de57..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/cpc_current.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc_current.tsv.zip" -f_name = "cpc_current.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py deleted file mode 100644 index 0085ba9..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/cpc_group.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc_group.tsv.zip" -f_name = "cpc_group.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py deleted file mode 100644 index fcdc590..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/cpc_subgroup.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc_subgroup.tsv.zip" -f_name = "cpc_subgroup.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py b/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py deleted file mode 100644 index 045366a..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/cpc_subsection.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc_subsection.tsv.zip" -f_name = "cpc_subsection.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/figures.py b/03_bulk_download_read_in/Python Scripts/archive/figures.py deleted file mode 100644 index ed62a0a..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/figures.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "figures.tsv.zip" -f_name = "figures.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(include='float64')) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py b/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py deleted file mode 100644 index 90b407c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/foreign_priority.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Foreign priority data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("//Dc1fs/dc1ehd/share/Science Policy Portfolio/PatentsView IV/Documentation/Tables/20200331") -file_name = "foreign_priority.tsv.zip" -f_name = "foreign_priority.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py b/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py deleted file mode 100644 index a444d48..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/foreigncitation.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Citations made to foreign patents by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "foreigncitation.tsv.zip" -f_name = "foreigncitation.tsv" -zf = zip.ZipFile(file_name) -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/government_interest.py b/03_bulk_download_read_in/Python Scripts/archive/government_interest.py deleted file mode 100644 index 5ed4347..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/government_interest.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw government interest statements on all patents (where available) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "government_interest.tsv.zip" -f_name = "government_interest.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/government_organization.py b/03_bulk_download_read_in/Python Scripts/archive/government_organization.py deleted file mode 100644 index 84e270d..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/government_organization.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Organization names and related agency hierarchy parsed from the government interest statements on all patents (where available) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "government_organization.tsv.zip" -f_name = "government_organization.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/inventor.py b/03_bulk_download_read_in/Python Scripts/archive/inventor.py deleted file mode 100644 index c06fb90..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/inventor.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Disambiguated inventor data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "inventor.tsv.zip" -f_name = "inventor.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py b/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py deleted file mode 100644 index 7d0cb61..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/inventor_gender.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "inventor_gender.tsv.zip" -f_name = "inventor_gender.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/ipcr.py b/03_bulk_download_read_in/Python Scripts/archive/ipcr.py deleted file mode 100644 index 01d8c5d..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/ipcr.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for International Patent Classification data for all patents (as of publication date) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "ipcr.tsv.zip" -f_name = "ipcr.tsv" -zf = zip.ZipFile(file_name) -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/lawyer.py b/03_bulk_download_read_in/Python Scripts/archive/lawyer.py deleted file mode 100644 index b68f159..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/lawyer.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Disambiguated lawyer data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "lawyer.tsv.zip" -f_name = "lawyer.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/location.py b/03_bulk_download_read_in/Python Scripts/archive/location.py deleted file mode 100644 index 522768c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/location.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Disambiguated location data, including latitude and longitude - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "location.tsv.zip" -f_name = "location.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py b/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py deleted file mode 100644 index dec8c20..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/location_assignee.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Metadata table for many-to-many relationships - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "location_assignee.tsv.zip" -f_name = "location_assignee.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/mainclass.py b/03_bulk_download_read_in/Python Scripts/archive/mainclass.py deleted file mode 100644 index 647085c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/mainclass.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "mainclass.tsv.zip" -f_name = "mainclass.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py b/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py deleted file mode 100644 index 6414842..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/mainclass_current.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "mainclass_current.tsv.zip" -f_name = "mainclass_current.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber.py b/03_bulk_download_read_in/Python Scripts/archive/nber.py deleted file mode 100644 index 4044f8f..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/nber.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "nber.tsv.zip" -f_name = "nber.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber_category.py b/03_bulk_download_read_in/Python Scripts/archive/nber_category.py deleted file mode 100644 index 7ed96b0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/nber_category.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "nber_category.tsv.zip" -f_name = "nber_category.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py b/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py deleted file mode 100644 index c641240..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/nber_subcategory.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "nber_subcategory.tsv.zip" -f_name = "nber_subcategory.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py b/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py deleted file mode 100644 index 5e7e7ad..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/non_inventor_applicant.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "non_inventor_applicant.tsv.zip" -f_name = "non_inventor_applicant.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/otherreference.py b/03_bulk_download_read_in/Python Scripts/archive/otherreference.py deleted file mode 100644 index d45f8e2..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/otherreference.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "otherreference.tsv.zip" -f_name = "otherreference.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -dtype={'sequence': int} -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent.py b/03_bulk_download_read_in/Python Scripts/archive/patent.py deleted file mode 100644 index 6f31ed7..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/patent.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "patent.tsv.zip" -f_name = "patent.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -dtype={'sequence': int} -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py b/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py deleted file mode 100644 index 8549df3..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/patent_contractawardnumber.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Contract or award numbers parsed from the government interest statements on all patents (where available) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "patent_contractawardnumber.tsv.zip" -f_name = "patent_contractawardnumber.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py b/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py deleted file mode 100644 index 687ce95..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/patent_govintorg.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Metadata table with patent-to-organization relationships linked to the government_organization table - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "patent_govintorg.tsv.zip" -f_name = "patent_govintorg.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py b/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py deleted file mode 100644 index f3f91a8..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/patent_inventor.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Metadata table for many-to-many relationships - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "patent_inventor.tsv.zip" -f_name = "patent_inventor.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py b/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py deleted file mode 100644 index 086a5ac..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/patent_lawyer.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for Metadata table for many-to-many relationships - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "patent_lawyer.tsv.zip" -f_name = "patent_lawyer.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/pct_data.py b/03_bulk_download_read_in/Python Scripts/archive/pct_data.py deleted file mode 100644 index 59d6a69..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/pct_data.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for PCT data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pct_data.tsv.zip" -f_name = "pct_data.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py b/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py deleted file mode 100644 index 01aa9f0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/persistent_assignee_disambig.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Persistant Assignee Disambiguation - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "persistent_assignee_disambig.tsv.zip" -f_name = "persistent_assignee_disambig.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of columns, observations, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py b/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py deleted file mode 100644 index 36124e9..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/persistent_inventor_disambig.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Persistant Inventor Disambiguation - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "persistent_inventor_disambig.tsv.zip" -f_name = "persistent_inventor_disambig.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py b/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py deleted file mode 100644 index 90b9ff4..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rawassignee.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw assignee information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "rawassignee.tsv.zip" -f_name = "rawassignee.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py b/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py deleted file mode 100644 index 3ddfc53..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rawexaminer.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw examiner information - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "rawexaminer.tsv.zip" -f_name = "rawexaminer.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py b/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py deleted file mode 100644 index 32bba30..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rawinventor.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw inventor information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "rawinventor.tsv.zip" -f_name = "rawinventor.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py b/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py deleted file mode 100644 index 43f7b7b..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rawlawyer.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "rawlawyer.tsv.zip" -f_name = "rawlawyer.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py b/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py deleted file mode 100644 index 89b1f17..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rawlocation.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "rawlocation.tsv.zip" -f_name = "rawlocation.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -dtype={'sequence': int} -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py b/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py deleted file mode 100644 index 9fe0cc4..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/rel_app_text.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "rel_app_text.tsv.zip" -f_name = "rel_app_text.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/subclass.py b/03_bulk_download_read_in/Python Scripts/archive/subclass.py deleted file mode 100644 index 07603dd..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/subclass.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "subclass.tsv.zip" -f_name = "subclass.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py b/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py deleted file mode 100644 index 1b37cc0..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/subclass_current.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "subclass_current.tsv.zip" -f_name = "subclass_current.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py b/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py deleted file mode 100644 index 38d86cb..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/us_term_of_grant.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for U.S. term of grant data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "us_term_of_grant.tsv.zip" -f_name = "us_term_of_grant.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py b/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py deleted file mode 100644 index f1dd96c..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/usapplicationcitation.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Citations made to US patent applications by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "usapplicationcitation.tsv.zip" -f_name = "usapplicationcitation.tsv" -zf = zip.ZipFile(file_name) -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py b/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py deleted file mode 100644 index 5272086..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/uspatentcitation.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Citations made to US granted patents by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "uspatentcitation.tsv.zip" -f_name = "uspatentcitation.tsv" -zf = zip.ZipFile(file_name) -chunksize = 2*(10 ** 6) -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspc.py b/03_bulk_download_read_in/Python Scripts/archive/uspc.py deleted file mode 100644 index 7656429..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/uspc.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for USPC classification data for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "uspc.tsv.zip" -f_name = "uspc.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py b/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py deleted file mode 100644 index 3847dae..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/uspc_current.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Current USPC classification data for all patents up to May 2015 - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "uspc_current.tsv.zip" -f_name = "uspc_current.tsv" -zf = zip.ZipFile(file_name) -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py b/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py deleted file mode 100644 index 6c238f9..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/usreldoc.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for U.S. related documents (post-2005 patents only) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "usreldoc.tsv.zip" -f_name = "usreldoc.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/wipo.py b/03_bulk_download_read_in/Python Scripts/archive/wipo.py deleted file mode 100644 index 4e9e970..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/wipo.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for WIPO technology fields for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "wipo.tsv.zip" -f_name = "wipo.tsv" -zf = zip.ZipFile(file_name) -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py b/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py deleted file mode 100644 index 8df42ff..0000000 --- a/03_bulk_download_read_in/Python Scripts/archive/wipo_field.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Lookup table of WIPO technology fields - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "wipo_field.tsv.zip" -f_name = "wipo_field.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -df = pd.read_csv(zf.open(f_name), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py deleted file mode 100644 index d5b42dc..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_applicant_not_disambiguated.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_applicant_not_disambiguated.tsv.zip" -f_name = "g_applicant_not_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_application.py b/03_bulk_download_read_in/Python Scripts/g_application.py deleted file mode 100644 index c8d062b..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_application.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_application.tsv.zip" -f_name = "g_application.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py deleted file mode 100644 index dec2f55..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_assignee_disambiguated.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_assignee_disambiguated.tsv.zip" -f_name = "g_assignee_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py deleted file mode 100644 index 7eb3a85..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_assignee_not_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw assignee information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_assignee_not_disambiguated.tsv.zip" -f_name = "g_assignee_not_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py deleted file mode 100644 index 69be381..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_attorney_disambiguated.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Disambiguated lawyer data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_attorney_disambiguated.tsv.zip" -f_name = "g_attorney_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py deleted file mode 100644 index 9247fb8..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_attorney_not_disambiguated.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_attorney_not_disambiguated.tsv.zip" -f_name = "g_attorney_not_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_botanic.py b/03_bulk_download_read_in/Python Scripts/g_botanic.py deleted file mode 100644 index db37c46..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_botanic.py +++ /dev/null @@ -1,22 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_botanic.tsv.zip" -f_name = "g_botanic.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_cpc_title.py b/03_bulk_download_read_in/Python Scripts/g_cpc_title.py deleted file mode 100644 index 01fd94e..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_cpc_title.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_cpc_title.tsv.zip" -f_name = "g_cpc_title.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py deleted file mode 100644 index 39e7553..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_examiner_not_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw examiner information - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_examiner_not_disambiguated.tsv.zip" -f_name = "g_examiner_not_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_figures.py b/03_bulk_download_read_in/Python Scripts/g_figures.py deleted file mode 100644 index a3fbdea..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_figures.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_figures.tsv.zip" -f_name = "g_figures.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(include='float64')) - - diff --git a/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py b/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py deleted file mode 100644 index 06286c0..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_foreign_citation.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Citations made to foreign patents by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_foreign_citation.tsv.zip" -f_name = "g_foreign_citation.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py b/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py deleted file mode 100644 index 964fca9..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_foreign_priority.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Foreign priority data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_foreign_priority.tsv.zip" -f_name = "g_foreign_priority.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_gov_interest.py b/03_bulk_download_read_in/Python Scripts/g_gov_interest.py deleted file mode 100644 index d635334..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_gov_interest.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Raw government interest statements on all patents (where available) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_gov_interest.tsv.zip" -f_name = "g_gov_interest.tsv" - -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: - -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -print(df.head()) - -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() - -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py b/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py deleted file mode 100644 index 01b2695..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_gov_interest_org.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Metadata table with patent-to-organization relationships linked to the government_organization table - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_gov_interest_org.tsv.zip" -f_name = "g_gov_interest_org.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py deleted file mode 100644 index 1a406d6..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_inventor_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Metadata table for many-to-many relationships - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_inventor_disambiguated.tsv.zip" -f_name = "g_inventor_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py deleted file mode 100644 index a416d9c..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_inventor_not_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw inventor information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_inventor_not_disambiguated.tsv.zip" -f_name = "g_inventor_not_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py b/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py deleted file mode 100644 index 12a39a3..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_ipc_at_issue.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for International Patent Classification data for all patents (as of publication date) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_ipc_at_issue.tsv.zip" -f_name = "g_ipc_at_issue.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py deleted file mode 100644 index b23e0c8..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_location_disambiguated.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Disambiguated location data, including latitude and longitude - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_location_disambiguated.tsv.zip" -f_name = "g_location_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py deleted file mode 100644 index a92743e..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_location_not_disambiguated.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_location_not_disambiguated.tsv.zip" -f_name = "g_location_not_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - dtype={'sequence': int} - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_other_reference.py b/03_bulk_download_read_in/Python Scripts/g_other_reference.py deleted file mode 100644 index f1a1989..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_other_reference.py +++ /dev/null @@ -1,33 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_other_reference.tsv.zip" -f_name = "g_other_reference.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -chunksize = 15*(10 ** 5) -count = 1 -n_obs = 0 -dtype={'sequence': int} -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_patent.py b/03_bulk_download_read_in/Python Scripts/g_patent.py deleted file mode 100644 index 733472e..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_patent.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_patent.tsv.zip" -f_name = "g_patent.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - dtype={'sequence': int} - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/g_pct_data.py b/03_bulk_download_read_in/Python Scripts/g_pct_data.py deleted file mode 100644 index 46034c2..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_pct_data.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for PCT data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_pct_data.tsv.zip" -f_name = "g_pct_data.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py b/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py deleted file mode 100644 index 4404169..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_persistent_assignee.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Persistant Assignee Disambiguation - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_persistent_assignee.tsv.zip" -f_name = "g_persistent_assignee.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of columns, observations, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py b/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py deleted file mode 100644 index a3f8cab..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_persistent_inventor.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Persistant Inventor Disambiguation - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_persistent_inventor.tsv.zip" -f_name = "g_persistent_inventor.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py b/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py deleted file mode 100644 index 5845e89..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_rel_app_text.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "g_rel_app_text.tsv.zip" -f_name = "g_rel_app_text.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py b/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py deleted file mode 100644 index 9ce0206..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_us_application_citation.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Citations made to US patent applications by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_us_application_citation.tsv.zip" -f_name = "g_us_application_citation.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py b/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py deleted file mode 100644 index b0f0681..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_us_patent_citation.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for Citations made to US granted patents by US patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_us_patent_citation.tsv.zip" -f_name = "g_us_patent_citation.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 2*(10 ** 6) - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py b/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py deleted file mode 100644 index afc1ff1..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_us_rel_doc.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for U.S. related documents (post-2005 patents only) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_us_rel_doc.tsv.zip" -f_name = "g_us_rel_doc.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py b/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py deleted file mode 100644 index 9e6d9ae..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_us_term_of_grant.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for U.S. term of grant data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "g_us_term_of_grant.tsv.zip" -f_name = "g_us_term_of_grant.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) - - diff --git a/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py b/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py deleted file mode 100644 index 79cbad3..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_uspc_at_issue.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for USPC classification data for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_uspc_at_issue.tsv.zip" -f_name = "g_uspc_at_issue.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py b/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py deleted file mode 100644 index 9435a51..0000000 --- a/03_bulk_download_read_in/Python Scripts/g_wipo_technology.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for WIPO technology fields for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "g_wipo_technology.tsv.zip" -f_name = "g_wipo_technology.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py deleted file mode 100644 index 19449a2..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_applicant_not_disambiguated.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_applicant_not_disambiguated.tsv.zip" -f_name = "pg_applicant_not_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py deleted file mode 100644 index c52ca98..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_assignee_disambiguated.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_assignee_disambiguated.tsv.zip" -f_name = "pg_assignee_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py deleted file mode 100644 index a49f576..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_assignee_not_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw assignee information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "pg_assignee_not_disambiguated.tsv.zip" -f_name = "pg_assignee_not_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py deleted file mode 100644 index cbd8097..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_cpc_at_issue.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_cpc_at_issue.tsv.zip" -f_name = "pg_cpc_at_issue.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py deleted file mode 100644 index 9d9ed78..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_cpc_current.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_cpc_current.tsv.zip" -f_name = "pg_cpc_current.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py b/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py deleted file mode 100644 index 9cda79e..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_cpc_title.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_cpc_title.tsv.zip" -f_name = "pg_cpc_title.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py b/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py deleted file mode 100644 index 7c50401..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_foreign_priority.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Foreign priority data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pg_foreign_priority.tsv.zip" -f_name = "pg_foreign_priority.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py b/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py deleted file mode 100644 index cf4f32c..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_gov_interest.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Raw government interest statements on all patents (where available) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pg_gov_interest.tsv.zip" -f_name = "pg_gov_interest.tsv" - -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: - -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -print(df.head()) - -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() - -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py b/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py deleted file mode 100644 index 907752e..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_granted_pgpubs_crosswalk.py +++ /dev/null @@ -1,22 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_granted_pgpubs_crosswalk.tsv.zip" -f_name = "pg_granted_pgpubs_crosswalk.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py deleted file mode 100644 index ec5926c..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_inventor_disambiguated.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Disambiguated inventor data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pg_inventor_disambiguated.tsv.zip" -f_name = "pg_inventor_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py deleted file mode 100644 index e6b5776..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_inventor_not_disambiguated.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for Raw inventor information as it appears in the source text and XML files - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "pg_inventor_not_disambiguated.tsv.zip" -f_name = "pg_inventor_not_disambiguated.tsv" -with zip.ZipFile(file_name) as zf: -chunksize = 10 ** 6 -count = 1 -n_obs = 0 -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py deleted file mode 100644 index b97e1d4..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_ipc_at_issue.py +++ /dev/null @@ -1,25 +0,0 @@ -#Read-in script for International Patent Classification data for all patents (as of publication date) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "pg_ipc_at_issue.tsv.zip" -f_name = "pg_ipc_at_issue.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py deleted file mode 100644 index 994464e..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_location_disambiguated.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for Disambiguated location data, including latitude and longitude - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pg_location_disambiguated.tsv.zip" -f_name = "pg_location_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py b/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py deleted file mode 100644 index d11d3d7..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_location_not_disambiguated.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_location_not_disambiguated.tsv.zip" -f_name = "pg_location_not_disambiguated.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: - chunksize = 15*(10 ** 5) - count = 1 - n_obs = 0 - dtype={'sequence': int} - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting= csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) -print(df.describe(exclude=[np.number])) diff --git a/03_bulk_download_read_in/Python Scripts/pg_pct_data.py b/03_bulk_download_read_in/Python Scripts/pg_pct_data.py deleted file mode 100644 index 9d5ccfa..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_pct_data.py +++ /dev/null @@ -1,26 +0,0 @@ -#Read-in script for PCT data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pg_pct_data.tsv.zip" -f_name = "pg_pct_data.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) -# Print first five observations -print(df.head()) -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Provide additional information on certain variables. -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_published_application.py b/03_bulk_download_read_in/Python Scripts/pg_published_application.py deleted file mode 100644 index 245cc02..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_published_application.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_published_application.tsv.zip" -f_name = "pg_published_application.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py b/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py deleted file mode 100644 index 3216619..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_rel_app_text.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for Number of figures and sheets - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "pg_rel_app_text.tsv.zip" -f_name = "pg_rel_app_text.tsv" -# Selecting the zip file. -with zip.ZipFile(file_name) as zf: -# Reading the selected file in the zip. - with zf.open(f_name) as openfile: - df = pd.read_csv(openfile, delimiter="\t", quoting = csv.QUOTE_NONNUMERIC) - -# Print first five observations -df.head() -# Print summary of data: number of columns, observations, and each variable data type -print(len(df)) -df.info() -# Print basic summary statistics for numerical variables -print(df.describe(exclude=[np.number])) \ No newline at end of file diff --git a/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py b/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py deleted file mode 100644 index c524133..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_uspc_at_issue.py +++ /dev/null @@ -1,24 +0,0 @@ -#Read-in script for USPC classification data for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "pg_uspc_at_issue.tsv.zip" -f_name = "pg_uspc_at_issue.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) diff --git a/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py b/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py deleted file mode 100644 index d5657d5..0000000 --- a/03_bulk_download_read_in/Python Scripts/pg_wipo_technology.py +++ /dev/null @@ -1,27 +0,0 @@ -#Read-in script for WIPO technology fields for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "pg_wipo_technology.tsv.zip" -f_name = "pg_wipo_technology.tsv" -with zip.ZipFile(file_name) as zf: - chunksize = 10 ** 6 - count = 1 - n_obs = 0 - for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) - - - diff --git a/03_bulk_download_read_in/R Scripts/README.md b/03_bulk_download_read_in/R Scripts/README.md deleted file mode 100644 index 8d6e135..0000000 --- a/03_bulk_download_read_in/R Scripts/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# PatentsView-Code-Snippets - -# Bulk Download Files: R Read-in Scripts - -Below is a list of all bulk download files and information on whether or not there is a template read-in script currently available. - -If the script for a file is not currently available, other scripts in this repository can be used as reference. The files are all structured in the same manner so you should be able to use a template from a different file to help determine how to proceed. - - - -List of Resources: - -All R scripts were created using R 3.5.2 and R 4.0 - -Necessary Packages: - -- data.table - - -| Bulk Download File | Status of Script | -| --- |--- | -| application | *Available* | -| assignee | *Available* | -| botanic | *Available* | -| brf_sum_text | *In Progress* | -| claim | *Available* | -| cpc_current | *Available* | -| cpc_group | *Available* | -| cpc_subgroup | *Available* | -| cpc_subsection | *Available* | -| draw_desc_text | *In Progress* | -| detail_desc_text | *In Progress* | -| foreign_priority | *Available* | -| figures | *Available* | -| foreigncitation | *Available* | -| government_interest | *Available* | -| government_organization | *Available* | -| inventor | *Available* | -| inventor_gender | *Available* | -| ipcr | *Available* | -| lawyer | *Available* | -| location | *Available* | -| location_assignee | *Available* | -| location_inventor | *In Progress* | -| mainclass | *Available* | -| mainclass_current | *Available* | -| nber | *Available* | -| nber_category | *Available* | -| nber_subcategory | *Available* | -| non_inventor_applicant | *Available* | -| otherreference | *Available* | -| patent | *Available* | -| patent_assignee | *In Progress* | -| patent_contractawardnumber | *Available* | -| patent_govintorg | *Available* | -| patent_inventor | *Available* | -| patent_lawyer | *Available* | -| pct_data | *Available* | -| persistent_assignee_disambig | *Available* | -| persistent_inventor_disambig | *Available* | -| rawassignee | *Available* | -| rawexaminer | *Available* | -| rawinventor | *Available* | -| rawlawyer | *Available* | -| rawlocation | *Available* | -| rel_app_text | *Available* | -| subclass | *Available* | -| subclass_current | *Available* | -| us_term_of_grant | *Available* | -| usapplicationcitation | *In Progress* | -| uspatentcitation | *In Progress* | -| uspc | *Available* | -| uspc_current | *Available* | -| usreldoc | *Available* | -| wipo | *Available* | -| wipo_field | *Available* | \ No newline at end of file diff --git a/03_bulk_download_read_in/R Scripts/archive/assignee.rmd b/03_bulk_download_read_in/R Scripts/archive/assignee.rmd deleted file mode 100644 index a971b18..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/assignee.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("assignee.tsv.zip", "assignee.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/botanic.rmd b/03_bulk_download_read_in/R Scripts/archive/botanic.rmd deleted file mode 100644 index 5679f67..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/botanic.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for botanic data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("botanic.tsv.zip", "botanic.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd deleted file mode 100644 index eb5b2ad..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1976.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1976 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1976.tsv.zip", "claims_1976.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd deleted file mode 100644 index 51869ce..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1977.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1977 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1977.tsv.zip", "claims_1977.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd deleted file mode 100644 index 1d3df6e..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1978.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1978 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1978.tsv.zip", "claims_1978.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd deleted file mode 100644 index b3efe50..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1979.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1979 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1979.tsv.zip", "claims_1979.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd deleted file mode 100644 index 3951c4a..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1980.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1980 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1980.tsv.zip", "claims_1980.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd deleted file mode 100644 index 106eab3..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1981.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1981 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1981.tsv.zip", "claims_1981.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd deleted file mode 100644 index aaceec3..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1982.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1982 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1982.tsv.zip", "claims_1982.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd deleted file mode 100644 index 42cf546..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1983.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1983 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1983.tsv.zip", "claims_1983.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd deleted file mode 100644 index 8c3245e..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1984.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1984 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1984.tsv.zip", "claims_1984.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd deleted file mode 100644 index 014d9dc..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1985.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1985 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1985.tsv.zip", "claims_1985.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd deleted file mode 100644 index 77bb994..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1986.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1986 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1986.tsv.zip", "claims_1986.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd deleted file mode 100644 index f7a4e31..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1987.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1987 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1987.tsv.zip", "claims_1987.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd deleted file mode 100644 index 2a9fb76..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1988.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1988 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1988.tsv.zip", "claims_1988.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd deleted file mode 100644 index 815952c..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1989.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1989 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1989.tsv.zip", "claims_1989.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd deleted file mode 100644 index c437037..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1990.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1990 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1990.tsv.zip", "claims_1990.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd deleted file mode 100644 index b7f44ce..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1991.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1991 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1991.tsv.zip", "claims_1991.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd deleted file mode 100644 index b8d8e54..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1992.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1992 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1992.tsv.zip", "claims_1992.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd deleted file mode 100644 index 8f46b84..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1993.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1993 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1993.tsv.zip", "claims_1993.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd deleted file mode 100644 index 621a82a..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1994.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1994 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1994.tsv.zip", "claims_1994.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd deleted file mode 100644 index 8564015..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1995.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1995 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1995.tsv.zip", "claims_1995.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd deleted file mode 100644 index bf6fa74..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1996.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1996 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1996.tsv.zip", "claims_1996.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd deleted file mode 100644 index 358e9f1..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1997.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1997 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1997.tsv.zip", "claims_1997.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd deleted file mode 100644 index 13bff46..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1998.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1998 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1998.tsv.zip", "claims_1998.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd deleted file mode 100644 index 264c005..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_1999.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 1999 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_1999.tsv.zip", "claims_1999.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd deleted file mode 100644 index eab6314..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2000.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2000 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2000.tsv.zip", "claims_2000.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd deleted file mode 100644 index 9d357ef..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2001.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2001 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2001.tsv.zip", "claims_2001.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd deleted file mode 100644 index 97542b0..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2002.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2002 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2002.tsv.zip", "claims_2002.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd deleted file mode 100644 index 7967c8d..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2003.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2003 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2003.tsv.zip", "claims_2003.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd deleted file mode 100644 index 4076ceb..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2004.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2004 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2004.tsv.zip", "claims_2004.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd deleted file mode 100644 index 2947ad7..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2005.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2005 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2005.tsv.zip", "claims_2005.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd deleted file mode 100644 index bd9fb5f..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2006.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2006 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2006.tsv.zip", "claims_2006.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd deleted file mode 100644 index 7020d50..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2007.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2007 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2007.tsv.zip", "claims_2007.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd deleted file mode 100644 index 5bdf2d7..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2008.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2008 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2008.tsv.zip", "claims_2008.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd deleted file mode 100644 index feefdc3..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2009.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2009 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2009.tsv.zip", "claims_2009.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd deleted file mode 100644 index 6aa86f4..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2010.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2010 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2010.tsv.zip", "claims_2010.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd deleted file mode 100644 index bc485a8..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2011.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2011 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2011.tsv.zip", "claims_2011.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd deleted file mode 100644 index 4c4d00b..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2012.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2012 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2012.tsv.zip", "claims_2012.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd deleted file mode 100644 index d64f61c..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2013.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2013 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2013.tsv.zip", "claims_2013.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd deleted file mode 100644 index 2b7b32a..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2014.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2014 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2014.tsv.zip", "claims_2014.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd deleted file mode 100644 index e202fc1..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2015.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2015 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2015.tsv.zip", "claims_2015.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd deleted file mode 100644 index 1f458ed..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2016.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2016 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2016.tsv.zip", "claims_2016.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd deleted file mode 100644 index c4a60c4..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2017.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2017 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2017.tsv.zip", "claims_2017.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd deleted file mode 100644 index 871d569..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2018.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2018 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2018.tsv.zip", "claims_2018.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd deleted file mode 100644 index c4bd876..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2019.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2019 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2019.tsv.zip", "claims_2019.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd b/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd deleted file mode 100644 index c27fc25..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/claims/claims_2020.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for 2020 claims data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("claims_2020.tsv.zip", "claims_2020.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd deleted file mode 100644 index 1366d3d..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/cpc_current.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("cpc_current.tsv.zip", "cpc_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd deleted file mode 100644 index ed24e64..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/cpc_group.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_group data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("cpc_group.tsv.zip", "cpc_group.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd deleted file mode 100644 index 57d8554..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/cpc_subgroup.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_subgroup data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("cpc_subgroup.tsv.zip", "cpc_subgroup.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd b/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd deleted file mode 100644 index fb65cc6..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/cpc_subsection.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_subsection data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("cpc_subsection.tsv.zip", "cpc_subsection.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/figures.Rmd b/03_bulk_download_read_in/R Scripts/archive/figures.Rmd deleted file mode 100644 index 092ddd8..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/figures.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for figures data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("figures.tsv.zip", "figures.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd deleted file mode 100644 index 1f55108..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/foreign_priority.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for foreign_priority data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("foreign_priority.tsv.zip", "foreign_priority.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd deleted file mode 100644 index 9ee92da..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/foreigncitation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for foreign citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("foreigncitation.tsv.zip", "foreigncitation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd b/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd deleted file mode 100644 index 0e92cb1..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/government_interest.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_interest data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("government_interest.tsv.zip", "government_interest.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd b/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd deleted file mode 100644 index efb2775..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/government_organization.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_organization data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("government_organization.tsv.zip", "government_organization.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd deleted file mode 100644 index 56d42ee..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/inventor.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("inventor.tsv.zip", "inventor.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd b/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd deleted file mode 100644 index 10e5b26..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/inventor_gender.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for inventor_gender data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("inventor_gender.tsv.zip", "inventor_gender.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd b/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd deleted file mode 100644 index b34cceb..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/ipcr.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for ipcr data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("ipcr.tsv.zip", "ipcr.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd deleted file mode 100644 index 136aa95..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/lawyer.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for lawyer data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("lawyer.tsv.zip", "lawyer.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/location.Rmd b/03_bulk_download_read_in/R Scripts/archive/location.Rmd deleted file mode 100644 index f6b8261..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/location.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("location.tsv.zip", "location.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd b/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd deleted file mode 100644 index 8d45d29..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/location_assignee.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for location_assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("location_assignee.tsv.zip", "location_assignee.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd b/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd deleted file mode 100644 index ee2881b..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/mainclass.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for main class data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("mainclass.tsv.zip", "mainclass.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd deleted file mode 100644 index 27bf95a..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/mainclass_current.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for mainclass_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("mainclass_current.tsv.zip", "mainclass_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/nber.rmd b/03_bulk_download_read_in/R Scripts/archive/nber.rmd deleted file mode 100644 index 891ed45..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/nber.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for nber data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("nber.tsv.zip", "nber.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd b/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd deleted file mode 100644 index 29e82a9..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/nber_category.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for nber_category data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("nber_category.tsv.zip", "nber_category.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd b/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd deleted file mode 100644 index bb41d7d..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/nber_subcategory.rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for nber_subcategory data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("nber_subcategory.tsv.zip", "nber_subcategory.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd b/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd deleted file mode 100644 index 6b6e0f8..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/non_inventor_applicant.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for non_inventor_applicant data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("non_inventor_applicant.tsv.zip", "non_inventor_applicant.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd b/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd deleted file mode 100644 index 8d15609..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/otherreference.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for other reference data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("otherreference.tsv.zip", "otherreference.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/patent.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent.Rmd deleted file mode 100644 index 0671d52..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/patent.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("patent.tsv.zip", "patent.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd deleted file mode 100644 index d3b2b1e..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/patent_contractawardnumber.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_contractawardnumber data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("patent_contractawardnumber.tsv.zip", "patent_contractawardnumber.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd deleted file mode 100644 index a740e4e..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/patent_govintorg.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_govintorg data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("patent_govintorg.tsv.zip", "patent_govintorg.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd deleted file mode 100644 index 8056a85..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/patent_inventor.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("patent_inventor.tsv.zip", "patent_inventor.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd deleted file mode 100644 index d9d22fb..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/patent_lawyer.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_lawyer data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("patent_lawyer.tsv.zip", "patent_lawyer.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd b/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd deleted file mode 100644 index 4894b62..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/pct_data.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for pct_data data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pct_data.tsv.zip", "pct_data.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd b/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd deleted file mode 100644 index 9fb98b8..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/persistent_assignee_disambig.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for persistent_assignee_disambig data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("persistent_assignee_disambig.tsv.zip", "persistent_assignee_disambig.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd b/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd deleted file mode 100644 index 2ccace0..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/persistent_inventor_disambig.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for persistent_inventor_disambig data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("persistent_inventor_disambig.tsv.zip", "persistent_inventor_disambig.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd deleted file mode 100644 index 7947d52..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rawassignee.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rawassignee.tsv.zip", "rawassignee.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd deleted file mode 100644 index b980b03..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rawexaminer.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw examiner data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rawexaminer.tsv.zip", "rawexaminer.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd deleted file mode 100644 index 20c561a..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rawinventor.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rawinventor.tsv.zip", "rawinventor.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd deleted file mode 100644 index 0ccb7b2..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rawlawyer.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw lawyer data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rawlawyer.tsv.zip", "rawlawyer.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd b/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd deleted file mode 100644 index a7f75a9..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rawlocation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rawlocation.tsv.zip", "rawlocation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd deleted file mode 100644 index 344f376..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/rel_app_text.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for rel_app_text data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("rel_app_text.tsv.zip", "rel_app_text.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd b/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd deleted file mode 100644 index 7bc2171..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/subclass.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for subclass data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("subclass.tsv.zip", "subclass.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd deleted file mode 100644 index b963117..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/subclass_current.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for subclass_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("subclass_current.tsv.zip", "subclass_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd b/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd deleted file mode 100644 index 2bd922b..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/us_term_of_grant.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us_term_of_grant data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("us_term_of_grant.tsv.zip", "us_term_of_grant.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd deleted file mode 100644 index f8d0946..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/usapplicationcitation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us application citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("usapplicationcitation.tsv.zip", "usapplicationcitation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd deleted file mode 100644 index e493cfa..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/uspatentcitation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us patent citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("uspatentcitation.tsv.zip", "uspatentcitation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd deleted file mode 100644 index 9865a3c..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/uspc.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for uspc data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("uspc.tsv.zip", "uspc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd b/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd deleted file mode 100644 index 64ab8c7..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/uspc_current.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for uspc_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("uspc_current.tsv.zip", "uspc_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd b/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd deleted file mode 100644 index f2c6bfa..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/usreldoc.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for usreldoc data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("usreldoc.tsv.zip", "usreldoc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd b/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd deleted file mode 100644 index 0ea7797..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/wipo.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for wipo data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("wipo.tsv.zip", "wipo.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd b/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd deleted file mode 100644 index 527811f..0000000 --- a/03_bulk_download_read_in/R Scripts/archive/wipo_field.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for wipo_field data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("wipo_field.tsv.zip", "wipo_field.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd deleted file mode 100644 index a9c3b51..0000000 --- a/03_bulk_download_read_in/R Scripts/g_applicant_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for non_inventor_applicant data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_applicant_not_disambiguated.tsv.zip", "g_applicant_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_application.Rmd b/03_bulk_download_read_in/R Scripts/g_application.Rmd deleted file mode 100644 index 109ad25..0000000 --- a/03_bulk_download_read_in/R Scripts/g_application.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_application.tsv.zip", "g_application.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd deleted file mode 100644 index 7c4ce10..0000000 --- a/03_bulk_download_read_in/R Scripts/g_assignee_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_assignee_disambiguated.tsv.zip", "g_assignee_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd deleted file mode 100644 index 9002006..0000000 --- a/03_bulk_download_read_in/R Scripts/g_assignee_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_assignee_not_disambiguated.tsv.zip", "g_assignee_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd deleted file mode 100644 index ae9fc12..0000000 --- a/03_bulk_download_read_in/R Scripts/g_attorney_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for lawyer data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_attorney_disambiguated.tsv.zip", "g_attorney_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd deleted file mode 100644 index c8d74eb..0000000 --- a/03_bulk_download_read_in/R Scripts/g_attorney_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw lawyer data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_attorney_not_disambiguated.tsv.zip", "g_attorney_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_botanic.Rmd b/03_bulk_download_read_in/R Scripts/g_botanic.Rmd deleted file mode 100644 index e70b50e..0000000 --- a/03_bulk_download_read_in/R Scripts/g_botanic.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for botanic data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_botanic.tsv.zip", "g_botanic.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd deleted file mode 100644 index 7564e1f..0000000 --- a/03_bulk_download_read_in/R Scripts/g_cpc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_cpc_at_issue.tsv.zip", "g_cpc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd b/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd deleted file mode 100644 index 98e259b..0000000 --- a/03_bulk_download_read_in/R Scripts/g_cpc_title.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_group data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_cpc_title.tsv.zip", "g_cpc_title.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd deleted file mode 100644 index 26cc21e..0000000 --- a/03_bulk_download_read_in/R Scripts/g_examiner_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw examiner data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_examiner_not_disambiguated.tsv.zip", "g_examiner_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_figures.Rmd b/03_bulk_download_read_in/R Scripts/g_figures.Rmd deleted file mode 100644 index a128454..0000000 --- a/03_bulk_download_read_in/R Scripts/g_figures.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for figures data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_figures.tsv.zip", "g_figures.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd deleted file mode 100644 index 6a38b12..0000000 --- a/03_bulk_download_read_in/R Scripts/g_foreign_citation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for foreign citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_foreign_citation.tsv.zip", "g_foreign_citation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd deleted file mode 100644 index e5827c9..0000000 --- a/03_bulk_download_read_in/R Scripts/g_foreign_priority.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for foreign_priority data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_foreign_priority.tsv.zip", "g_foreign_priority.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd deleted file mode 100644 index f544175..0000000 --- a/03_bulk_download_read_in/R Scripts/g_gov_interest.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_interest data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_gov_interest.tsv.zip", "g_gov_interest.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd deleted file mode 100644 index 42fff94..0000000 --- a/03_bulk_download_read_in/R Scripts/g_gov_interest_contracts.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_contractawardnumber data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_gov_interest_contracts.tsv.zip", "g_gov_interest_contracts.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd b/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd deleted file mode 100644 index 61828c6..0000000 --- a/03_bulk_download_read_in/R Scripts/g_gov_interest_org.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_interest data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_gov_interest_org.tsv.zip", "g_gov_interest_org.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd deleted file mode 100644 index 1995350..0000000 --- a/03_bulk_download_read_in/R Scripts/g_inventor_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_inventor_disambiguated.tsv.zip", "g_inventor_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd deleted file mode 100644 index 1850753..0000000 --- a/03_bulk_download_read_in/R Scripts/g_inventor_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_inventor_not_disambiguated.tsv.zip", "g_inventor_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd deleted file mode 100644 index 0e77ff8..0000000 --- a/03_bulk_download_read_in/R Scripts/g_ipc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for ipcr data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_ipc_at_issue.tsv.zip", "g_ipc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd deleted file mode 100644 index 1c6fce5..0000000 --- a/03_bulk_download_read_in/R Scripts/g_location_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_location_disambiguated.tsv.zip", "g_location_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd deleted file mode 100644 index a89e549..0000000 --- a/03_bulk_download_read_in/R Scripts/g_location_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_location_not_disambiguated.tsv.zip", "g_location_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd b/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd deleted file mode 100644 index f5189db..0000000 --- a/03_bulk_download_read_in/R Scripts/g_other_reference.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for other reference data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_other_reference.tsv.zip", "g_other_reference.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_patent.Rmd b/03_bulk_download_read_in/R Scripts/g_patent.Rmd deleted file mode 100644 index 166e511..0000000 --- a/03_bulk_download_read_in/R Scripts/g_patent.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_patent.tsv.zip", "g_patent.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd b/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd deleted file mode 100644 index 2dc323f..0000000 --- a/03_bulk_download_read_in/R Scripts/g_pct_data.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for pct_data data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_pct_data.tsv.zip", "g_pct_data.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd b/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd deleted file mode 100644 index df0908a..0000000 --- a/03_bulk_download_read_in/R Scripts/g_persistent_assignee.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for persistent_assignee_disambig data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_persistent_assignee.tsv.zip", "g_persistent_assignee.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd b/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd deleted file mode 100644 index 3bd5872..0000000 --- a/03_bulk_download_read_in/R Scripts/g_persistent_inventor.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for persistent_inventor_disambig data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_persistent_inventor.tsv.zip", "g_persistent_inventor.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd deleted file mode 100644 index 66b0d49..0000000 --- a/03_bulk_download_read_in/R Scripts/g_rel_app_text.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for rel_app_text data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_rel_app_text.tsv.zip", "g_rel_app_text.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd deleted file mode 100644 index 9417c66..0000000 --- a/03_bulk_download_read_in/R Scripts/g_us_application_citation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us application citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_us_application_citation.tsv.zip", "g_us_application_citation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd b/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd deleted file mode 100644 index 6aa206c..0000000 --- a/03_bulk_download_read_in/R Scripts/g_us_patent_citation.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us patent citation data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_us_patent_citation.tsv.zip", "g_us_patent_citation.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd b/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd deleted file mode 100644 index a6ed9c5..0000000 --- a/03_bulk_download_read_in/R Scripts/g_us_rel_doc.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for usreldoc data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_us_rel_doc.tsv.zip", "g_us_rel_doc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd b/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd deleted file mode 100644 index 0c49a7c..0000000 --- a/03_bulk_download_read_in/R Scripts/g_us_term_of_grant.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us_term_of_grant data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_us_term_of_grant.tsv.zip", "g_us_term_of_grant.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd deleted file mode 100644 index 20c93ef..0000000 --- a/03_bulk_download_read_in/R Scripts/g_uspc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for uspc data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_uspc_at_issue.tsv.zip", "g_uspc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd b/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd deleted file mode 100644 index 3cd56a5..0000000 --- a/03_bulk_download_read_in/R Scripts/g_wipo_technology.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for wipo data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("g_wipo_technology.tsv.zip", "g_wipo_technology.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd deleted file mode 100644 index a1c6c4b..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_applicant_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for subclass data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_applicant_not_disambiguated.tsv.zip", "pg_applicant_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd deleted file mode 100644 index f3f2776..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_assignee_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_assignee_disambiguated.tsv.zip", "pg_assignee_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd deleted file mode 100644 index 74cff63..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_assignee_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw assignee data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_assignee_not_disambiguated.tsv.zip", "pg_assignee_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd deleted file mode 100644 index ade632b..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_cpc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_cpc_at_issue.tsv.zip", "pg_cpc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd deleted file mode 100644 index cc8e732..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_cpc_current.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_current data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_cpc_current.tsv.zip", "pg_cpc_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd b/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd deleted file mode 100644 index a075adc..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_cpc_title.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for cpc_group data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_cpc_title.tsv.zip", "pg_cpc_title.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd b/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd deleted file mode 100644 index 793032b..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_foreign_priority.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for foreign_priority data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_foreign_priority.tsv.zip", "pg_foreign_priority.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd deleted file mode 100644 index 76d86bc..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_gov_interest.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_interest data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_gov_interest.tsv.zip", "pg_gov_interest.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd deleted file mode 100644 index ee67327..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_gov_interest_contracts.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent_contractawardnumber data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_gov_interest_contracts.tsv.zip", "pg_gov_interest_contracts.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd b/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd deleted file mode 100644 index 0a6260c..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_gov_interest_org.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for government_interest data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_gov_interest_org.tsv.zip", "pg_gov_interest_org.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd b/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd deleted file mode 100644 index 5e996bb..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_granted_pgpubs_crosswalk.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for patent data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_granted_pgpubs_crosswalk.tsv.zip", "pg_granted_pgpubs_crosswalk.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd deleted file mode 100644 index 22434e3..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_inventor_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_inventor_disambiguated.tsv.zip", "pg_inventor_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd deleted file mode 100644 index a88fe1e..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_inventor_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw inventor data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_inventor_not_disambiguated.tsv.zip", "pg_inventor_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd deleted file mode 100644 index 9fabdd0..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_ipc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for ipcr data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_ipc_at_issue.tsv.zip", "pg_ipc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd deleted file mode 100644 index d9fc6fa..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_location_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_location_disambiguated.tsv.zip", "pg_location_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd b/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd deleted file mode 100644 index 8b931b2..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_location_not_disambiguated.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for raw location data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_location_not_disambiguated.tsv.zip", "pg_location_not_disambiguated.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd b/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd deleted file mode 100644 index 27e684d..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_pct_data.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for pct_data data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_pct_data.tsv.zip", "pg_pct_data.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd b/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd deleted file mode 100644 index 8dfb717..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_published_application.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for us_term_of_grant data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_published_application.tsv.zip", "pg_published_application.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd b/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd deleted file mode 100644 index 1f79271..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_rel_app_text.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for rel_app_text data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_rel_app_text.tsv.zip", "pg_rel_app_text.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd b/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd deleted file mode 100644 index 11aebbd..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_uspc_at_issue.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for uspc data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_uspc_at_issue.tsv.zip", "pg_uspc_at_issue.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd b/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd deleted file mode 100644 index 7a97372..0000000 --- a/03_bulk_download_read_in/R Scripts/pg_wipo_technology.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "Read-in script and summary information for wipo data" -output: html_document ---- - -```{r} -library(data.table) -library(vroom) -``` - -## Load patent file -Please include the folder path of the file. Ex: setwd("C:/Users/johnsmith/Downloads") -```{r} -setwd("") -data <- vroom::vroom(unz("pg_wipo_technology.tsv.zip", "pg_wipo_technology.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## Print information on the dataset: Number of observations, column variables, data type for each variable, and number of distinct values for non-numeric variables. -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/03_bulk_download_read_in/README.md b/03_bulk_download_read_in/README.md deleted file mode 100644 index 7eb7109..0000000 --- a/03_bulk_download_read_in/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# PatentsView-Code-Snippets - -# Bulk Download Files: Read-in Scripts - -Due to changes in the structure of the Bulk Download files, the PatentsView team has created template scripts in Python and R which demonstrate how to read in these tsv files. - -See the file format settings below: -| Table | File(s) | Data Contains Line Break | Field Separator | Quote Settings | Quote Character | -|------------------|-------------------------------|--------------------------|-----------------|---------------------------|-----------------| -| claims | Yearly files from 1976 - 2000 | Yes | \t | Non Numeric Fields Quoted | " | -| claims | 2001 data file | No | \t | Non Numeric Fields Quoted | " | -| claims | Yearly files from 2002 - 2020 | Yes | \t | Non Numeric Fields Quoted | " | -| brf_sum_text | Yearly files 1976 - 2020 | Yes | \t | Non Numeric Fields Quoted | " | -| detail_desc_text | Yearly files from 1976 - 2000 | Yes | \t | Non Numeric Fields Quoted | " | -| detail_desc_text | Yearly files from 2001 - 2004 | No | \t | Non Numeric Fields Quoted | unquoted | -| detail_desc_text | Yearly files from 2005 - 2000 | Yes | \t | Non Numeric Fields Quoted | " | -| draw_desc_text | Yearly files from 1976 - 2020 | Yes | \t | Non Numeric Fields Quoted | " | -| all other tables | Single bulk file | No | \t | Non Numeric Fields Quoted | " | diff --git a/04_bulk_pregrant_read_in/Python Scripts/application.py b/04_bulk_pregrant_read_in/Python Scripts/application.py deleted file mode 100644 index a989714..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/application.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Number of figures and sheets -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "application.tsv.zip" -f_name = "application.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py deleted file mode 100644 index 2122e34..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2005.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2005 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2005.tsv.zip" -f_name = "brf_sum_text_2005.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py deleted file mode 100644 index 625df16..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2006.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2006 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2006.tsv.zip" -f_name = "brf_sum_text_2006.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py deleted file mode 100644 index 2738591..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2007.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2007 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2007.tsv.zip" -f_name = "brf_sum_text_2007.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py deleted file mode 100644 index 9ba7e9e..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2008.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2008 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2008.tsv.zip" -f_name = "brf_sum_text_2008.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py deleted file mode 100644 index e3dbeb3..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2009.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2009 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2009.tsv.zip" -f_name = "brf_sum_text_2009.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py deleted file mode 100644 index ee0865c..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2010.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2010 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2010.tsv.zip" -f_name = "brf_sum_text_2010.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py deleted file mode 100644 index 3096f26..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2011.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2011 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2011.tsv.zip" -f_name = "brf_sum_text_2011.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py deleted file mode 100644 index 25b1c2c..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2012.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2012 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2012.tsv.zip" -f_name = "brf_sum_text_2012.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py deleted file mode 100644 index 922f81e..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2013.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2013 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2013.tsv.zip" -f_name = "brf_sum_text_2013.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py deleted file mode 100644 index 0bd0dcf..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2014.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2014 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2014.tsv.zip" -f_name = "brf_sum_text_2014.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py deleted file mode 100644 index b3baac2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2015.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2015 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2015.tsv.zip" -f_name = "brf_sum_text_2015.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py deleted file mode 100644 index 76124db..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2016.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2016 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2016.tsv.zip" -f_name = "brf_sum_text_2016.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py deleted file mode 100644 index db986e4..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2017.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2017 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2017.tsv.zip" -f_name = "brf_sum_text_2017.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py deleted file mode 100644 index 04c5288..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2018.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2018 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2018.tsv.zip" -f_name = "brf_sum_text_2018.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py deleted file mode 100644 index c1c4528..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2019.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2019 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2019.tsv.zip" -f_name = "brf_sum_text_2019.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py deleted file mode 100644 index 884dfc3..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/brf_sum_text_2020.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2020 brief summary text -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "brf_sum_text_2020.tsv.zip" -f_name = "brf_sum_text_2020.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py deleted file mode 100644 index 65c5bb2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2005.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2005 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2005.tsv.zip" -f_name = "claim_2005.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py deleted file mode 100644 index 50ca11b..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2006.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2006 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2006.tsv.zip" -f_name = "claim_2006.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py deleted file mode 100644 index 9043d72..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2007.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2007 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2007.tsv.zip" -f_name = "claim_2007.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py deleted file mode 100644 index 2db9a33..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2008.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2008 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2008.tsv.zip" -f_name = "claim_2008.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py deleted file mode 100644 index fc5a34b..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2009.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2009 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2009.tsv.zip" -f_name = "claim_2009.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py deleted file mode 100644 index c0a513b..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2010.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2010 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2010.tsv.zip" -f_name = "claim_2010.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py deleted file mode 100644 index fa1adc2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2011.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2011 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2011.tsv.zip" -f_name = "claim_2011.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py deleted file mode 100644 index 573f708..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2012.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2012 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2012.tsv.zip" -f_name = "claim_2012.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py deleted file mode 100644 index 93a693e..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2013.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2013 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2013.tsv.zip" -f_name = "claim_2013.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py deleted file mode 100644 index ec148e4..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2014.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2014 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2014.tsv.zip" -f_name = "claim_2014.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py deleted file mode 100644 index 53b90e2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2015.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2015 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2015.tsv.zip" -f_name = "claim_2015.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py deleted file mode 100644 index 17bff16..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2016.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2016 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2016.tsv.zip" -f_name = "claim_2016.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py deleted file mode 100644 index 3d6db13..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2017.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2017 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2017.tsv.zip" -f_name = "claim_2017.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py deleted file mode 100644 index dbbe2c5..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2018.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2018 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2018.tsv.zip" -f_name = "claim_2018.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py deleted file mode 100644 index 8864b6d..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2019.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2019 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2019.tsv.zip" -f_name = "claim_2019.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py b/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py deleted file mode 100644 index 12a4130..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/claim_2020.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for 2020 Claims Data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "claim_2020.tsv.zip" -f_name = "claim_2020.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/cpc.py b/04_bulk_pregrant_read_in/Python Scripts/cpc.py deleted file mode 100644 index 138c5f6..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/cpc.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Number of figures and sheets -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc.tsv.zip" -f_name = "cpc.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py b/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py deleted file mode 100644 index 9cc1ce7..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/cpc_current.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Number of figures and sheets -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "cpc_current.tsv.zip" -f_name = "cpc_current.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py deleted file mode 100644 index 8f6b1a0..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2005.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2005 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2005.tsv.zip" -f_name = "detail_desc_text_2005.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py deleted file mode 100644 index 6764318..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2006.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2006 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2006.tsv.zip" -f_name = "detail_desc_text_2006.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py deleted file mode 100644 index 376f055..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2007.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2007 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2007.tsv.zip" -f_name = "detail_desc_text_2007.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py deleted file mode 100644 index a8096ab..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2008.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2008 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2008.tsv.zip" -f_name = "detail_desc_text_2008.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py deleted file mode 100644 index ac391af..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2009.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2009 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2009.tsv.zip" -f_name = "detail_desc_text_2009.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py deleted file mode 100644 index 5b6bd0f..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2010.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2010 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2010.tsv.zip" -f_name = "detail_desc_text_2010.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py deleted file mode 100644 index 5fc7d0d..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2011.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2011 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2011.tsv.zip" -f_name = "detail_desc_text_2011.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py deleted file mode 100644 index 6ec0e29..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2012.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2012 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2012.tsv.zip" -f_name = "detail_desc_text_2012.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py deleted file mode 100644 index 967ff38..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2013.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2013 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2013.tsv.zip" -f_name = "detail_desc_text_2013.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py deleted file mode 100644 index e43aa33..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2014.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2014 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2014.tsv.zip" -f_name = "detail_desc_text_2014.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py deleted file mode 100644 index 8857199..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2015.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2015 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2015.tsv.zip" -f_name = "detail_desc_text_2015.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py deleted file mode 100644 index c536518..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2016.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2016 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2016.tsv.zip" -f_name = "detail_desc_text_2016.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py deleted file mode 100644 index 9a4d649..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2017.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2017 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2017.tsv.zip" -f_name = "detail_desc_text_2017.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py deleted file mode 100644 index 1736c47..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2018.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2018 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2018.tsv.zip" -f_name = "detail_desc_text_2018.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py deleted file mode 100644 index a9a56b6..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2019.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2019 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2019.tsv.zip" -f_name = "detail_desc_text_2019.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py deleted file mode 100644 index fc7047a..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/detail_desc_text_2020.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2020 detail_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "detail_desc_text_2020.tsv.zip" -f_name = "detail_desc_text_2020.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py deleted file mode 100644 index df58059..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2007.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2007 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2007.tsv.zip" -f_name = "draw_desc_text_2007.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py deleted file mode 100644 index c355da0..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2008.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2008 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2008.tsv.zip" -f_name = "draw_desc_text_2008.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py deleted file mode 100644 index 94b1d72..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2009.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2009 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2009.tsv.zip" -f_name = "draw_desc_text_2009.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py deleted file mode 100644 index e26c796..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2010.py +++ /dev/null @@ -1,28 +0,0 @@ -#Read-in script for 2010 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2010.tsv.zip" -f_name = "draw_desc_text_2010.tsv" -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py deleted file mode 100644 index 8230ede..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2011.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2011 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2011.tsv.zip" -f_name = "draw_desc_text_2011.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py deleted file mode 100644 index 8753c91..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2012.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2012 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2012.tsv.zip" -f_name = "draw_desc_text_2012.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py deleted file mode 100644 index 4b98a6d..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2013.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2013 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2013.tsv.zip" -f_name = "draw_desc_text_2013.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py deleted file mode 100644 index 7c25464..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2014.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2014 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2014.tsv.zip" -f_name = "draw_desc_text_2014.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py deleted file mode 100644 index 08e96fa..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2015.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2015 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2015.tsv.zip" -f_name = "draw_desc_text_2015.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py deleted file mode 100644 index aa571ee..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2016.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2016 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2016.tsv.zip" -f_name = "draw_desc_text_2016.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py deleted file mode 100644 index d1555b7..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2017.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2017 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2017.tsv.zip" -f_name = "draw_desc_text_2017.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py deleted file mode 100644 index 5e64113..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2018.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2018 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2018.tsv.zip" -f_name = "draw_desc_text_2018.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py deleted file mode 100644 index 03fb5ed..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2019.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2019 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2019.tsv.zip" -f_name = "draw_desc_text_2019.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py b/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py deleted file mode 100644 index 79bada2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/draw_desc_text_2020.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for 2020 draw_desc_text Data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "draw_desc_text_2020.tsv.zip" -f_name = "draw_desc_text_2020.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py b/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py deleted file mode 100644 index d1bf3b2..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/foreign_priority.py +++ /dev/null @@ -1,30 +0,0 @@ -#Read-in script for Foreign priority data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "foreign_priority.tsv.zip" -f_name = "foreign_priority.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py b/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py deleted file mode 100644 index b5ea263..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/granted_patent_crosswalk.py +++ /dev/null @@ -1,30 +0,0 @@ -#Read-in script for granted patent crosswalk data -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "granted_patent_crosswalk.tsv.zip" -f_name = "_temp_patent_crosswalk.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/ipcr.py b/04_bulk_pregrant_read_in/Python Scripts/ipcr.py deleted file mode 100644 index e837cf1..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/ipcr.py +++ /dev/null @@ -1,30 +0,0 @@ -#Read-in script for International Patent Classification data for all patents (as of publication date) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "ipcr.tsv.zip" -f_name = "ipcr.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/pct_data.py b/04_bulk_pregrant_read_in/Python Scripts/pct_data.py deleted file mode 100644 index e01ebf6..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/pct_data.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for PCT data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "pct_data.tsv.zip" -f_name = "pct_data.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/publication.py b/04_bulk_pregrant_read_in/Python Scripts/publication.py deleted file mode 100644 index 45cafb7..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/publication.py +++ /dev/null @@ -1,31 +0,0 @@ -#Read-in script for Publication data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np -pd.set_option('display.max_columns', None) -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -file_name = "publication.tsv.zip" -f_name = "publication.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py b/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py deleted file mode 100644 index 105cdf3..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/rawassignee.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for Raw inventor information as it appears in the source text and XML files -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "rawinventor.tsv.zip" -f_name = "rawinventor.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py b/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py deleted file mode 100644 index d7ffd5e..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/rawinventor.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for Raw assignee information as it appears in the source text and XML files -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# Selecting the zip file. -file_name = "rawassignee.tsv.zip" -f_name = "rawassignee.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py b/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py deleted file mode 100644 index bb205e7..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/rel_app_text.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for rel_app_text data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "rel_app_text.tsv.zip" -f_name = "rel_app_text.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/us_parties.py b/04_bulk_pregrant_read_in/Python Scripts/us_parties.py deleted file mode 100644 index 836754d..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/us_parties.py +++ /dev/null @@ -1,32 +0,0 @@ -#Read-in script for us_parties data - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -import numpy as np - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -file_name = "us_parties.tsv.zip" -f_name = "us_parties.tsv" -# Selecting the zip file. -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py b/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py deleted file mode 100644 index 7af943a..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/us_rel_doc.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for U.S. related documents (post-2005 patents only) - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "usreldoc.tsv.zip" -f_name = "usreldoc.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/Python Scripts/uspc.py b/04_bulk_pregrant_read_in/Python Scripts/uspc.py deleted file mode 100644 index 935d211..0000000 --- a/04_bulk_pregrant_read_in/Python Scripts/uspc.py +++ /dev/null @@ -1,29 +0,0 @@ -#Read-in script for USPC classification data for all patents - -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") -# Selecting the zip file. -file_name = "uspc.tsv.zip" -f_name = "uspc.tsv" -zf = zip.ZipFile(file_name) -# Reading the selected file in the zip. -chunksize = 10 ** 4 -count = 1 -n_obs = 0 -final = [] -for df in pd.read_csv(zf.open(f_name), delimiter="\t", chunksize=chunksize, quoting=csv.QUOTE_NONNUMERIC): - print('processing chunk: ' + str(count)) - n_obs += len(df) - count += 1 - final.append(df) -# Create data frame with all observations -df = pd.concat(final) -# Print summary of data: number of observations, columns, and each variable data type -print(n_obs) -print(df.dtypes) \ No newline at end of file diff --git a/04_bulk_pregrant_read_in/R Scripts/application.Rmd b/04_bulk_pregrant_read_in/R Scripts/application.Rmd deleted file mode 100644 index 5124fa7..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/application.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "application" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -setwd("C:\\Users\\jtutor\\Downloads") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("application.tsv.zip", "application.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd deleted file mode 100644 index 43818c2..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text.tsv.zip", "brf_sum_text.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd deleted file mode 100644 index 3f3aa9d..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2005.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2005" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2005.tsv.zip", "brf_sum_text_2005.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd deleted file mode 100644 index 6314172..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2006.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2006" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2006.tsv.zip", "brf_sum_text_2006.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd deleted file mode 100644 index 944e926..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2007.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2007" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2007.tsv.zip", "brf_sum_text_2007.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd deleted file mode 100644 index f3c6510..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2008.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2008" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2008.tsv.zip", "brf_sum_text_2008.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd deleted file mode 100644 index b321fb3..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2009.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2009" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2009.tsv.zip", "brf_sum_text_2009.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd deleted file mode 100644 index abaace3..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2010.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2010" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2010.tsv.zip", "brf_sum_text_2010.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd deleted file mode 100644 index 597e2c3..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2011.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2011" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2011.tsv.zip", "brf_sum_text_2011.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd deleted file mode 100644 index c3017b2..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2012.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2012" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2012.tsv.zip", "brf_sum_text_2012.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd deleted file mode 100644 index 4547c7f..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2013.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2013" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2013.tsv.zip", "brf_sum_text_2013.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd deleted file mode 100644 index 56de16c..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2014.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2014" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2014.tsv.zip", "brf_sum_text_2014.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd deleted file mode 100644 index 014b211..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2015.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2015" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2015.tsv.zip", "brf_sum_text_2015.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd deleted file mode 100644 index 01eab52..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2016.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2016" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2016.tsv.zip", "brf_sum_text_2016.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd deleted file mode 100644 index d817d86..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2017.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2017" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2017.tsv.zip", "brf_sum_text_2017.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd deleted file mode 100644 index 0540662..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2018.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2018" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2018.tsv.zip", "brf_sum_text_2018.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd deleted file mode 100644 index e87d77f..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2019.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2019" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2019.tsv.zip", "brf_sum_text_2019.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd deleted file mode 100644 index b1eb842..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/brf_sum_text_2020.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "brf_sum_text_2020" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("brf_sum_text_2020.tsv.zip", "brf_sum_text_2020.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd deleted file mode 100644 index b2d6a2e..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2005.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2005" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2005.tsv.zip", "claim_2005.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd deleted file mode 100644 index cb2acd4..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2006.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2006" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2006.tsv.zip", "claim_2006.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd deleted file mode 100644 index ed0c355..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2007.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2007" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2007.tsv.zip", "claim_2007.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd deleted file mode 100644 index 8f466c4..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2008.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2008" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2008.tsv.zip", "claim_2008.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd deleted file mode 100644 index ad7da30..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2009.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2009" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2009.tsv.zip", "claim_2009.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd deleted file mode 100644 index 9fec56c..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2010.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2010" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2010.tsv.zip", "claim_2010.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd deleted file mode 100644 index 9058700..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2011.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2011" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2011.tsv.zip", "claim_2011.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd deleted file mode 100644 index 3998537..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2012.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2012" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2012.tsv.zip", "claim_2012.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd deleted file mode 100644 index d4753ca..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2013.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2013" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2013.tsv.zip", "claim_2013.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd deleted file mode 100644 index 710b25e..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2014.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2014" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2014.tsv.zip", "claim_2014.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd deleted file mode 100644 index a8af866..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2015.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2015" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2015.tsv.zip", "claim_2015.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd deleted file mode 100644 index 4ebb221..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2016.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2016" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2016.tsv.zip", "claim_2016.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd deleted file mode 100644 index 6266834..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2017.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2017" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2017.tsv.zip", "claim_2017.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd deleted file mode 100644 index d9a5243..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2018.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2018" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2018.tsv.zip", "claim_2018.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd deleted file mode 100644 index 97a1cb1..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2019.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2019" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2019.tsv.zip", "claim_2019.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd deleted file mode 100644 index 18112d1..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/claim_2020.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "claim_2020" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("claim_2020.tsv.zip", "claim_2020.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd b/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd deleted file mode 100644 index 20aeffe..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/cpc.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "cpc" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("cpc.tsv.zip", "cpc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd b/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd deleted file mode 100644 index b38497d..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/cpc_current.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "cpc_current" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("cpc_current.tsv.zip", "cpc_current.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd deleted file mode 100644 index 1bf4f97..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2011.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2011" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2011.tsv.zip", "detail_desc_text_2011.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd deleted file mode 100644 index 2ca0213..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2012.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2012" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2012.tsv.zip", "detail_desc_text_2012.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd deleted file mode 100644 index 9a9525d..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2013.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2013" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2013.tsv.zip", "detail_desc_text_2013.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd deleted file mode 100644 index 4d36b7c..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2014.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2014" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2014.tsv.zip", "detail_desc_text_2014.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd deleted file mode 100644 index c54b9b4..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2015.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2015" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2015.tsv.zip", "detail_desc_text_2015.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd deleted file mode 100644 index 26d79a9..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2016.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2016" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2016.tsv.zip", "detail_desc_text_2016.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd deleted file mode 100644 index 83533b4..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2017.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2017" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2017.tsv.zip", "detail_desc_text_2017.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd deleted file mode 100644 index 9629451..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2018.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2018" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2018.tsv.zip", "detail_desc_text_2018.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd deleted file mode 100644 index 77b7985..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2019.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2019" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2019.tsv.zip", "detail_desc_text_2019.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd deleted file mode 100644 index 50713f5..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/detail_desc_text_2020.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "detail_desc_text_2020" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("detail_desc_text_2020.tsv.zip", "detail_desc_text_2020.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd deleted file mode 100644 index 73e2753..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2007.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2007" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2007.tsv.zip", "draw_desc_text_2007.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd deleted file mode 100644 index 2d289da..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2008.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2008" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2008.tsv.zip", "draw_desc_text_2008.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd deleted file mode 100644 index 642ae40..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2009.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2009" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2009.tsv.zip", "draw_desc_text_2009.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd deleted file mode 100644 index 530ed21..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2010.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2010" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2010.tsv.zip", "draw_desc_text_2010.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd deleted file mode 100644 index cf847fb..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2011.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2011" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2011.tsv.zip", "draw_desc_text_2011.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd deleted file mode 100644 index 87b9058..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2012.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2012" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2012.tsv.zip", "draw_desc_text_2012.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd deleted file mode 100644 index 452e686..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2013.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2013" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2013.tsv.zip", "draw_desc_text_2013.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd deleted file mode 100644 index 4a86c94..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2014.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2014" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2014.tsv.zip", "draw_desc_text_2014.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd deleted file mode 100644 index 1e8e987..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2015.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2015" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2015.tsv.zip", "draw_desc_text_2015.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd deleted file mode 100644 index 3de0940..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2016.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2016" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2016.tsv.zip", "draw_desc_text_2016.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd deleted file mode 100644 index 983cef6..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2017.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2017" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2017.tsv.zip", "draw_desc_text_2017.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd deleted file mode 100644 index bd9ea9a..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2018.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2018" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2018.tsv.zip", "draw_desc_text_2018.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd deleted file mode 100644 index 5267a9a..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2019.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2019" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2019.tsv.zip", "draw_desc_text_2019.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd b/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd deleted file mode 100644 index 04db8c4..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/draw_desc_text_2020.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "draw_desc_text_2020" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("draw_desc_text_2020.tsv.zip", "draw_desc_text_2020.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd b/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd deleted file mode 100644 index 803b253..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/foreign_priority.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "foreign_priority" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("foreign_priority.tsv.zip", "foreign_priority.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd b/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd deleted file mode 100644 index 091b9fb..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/granted_patent_crosswalk.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "granted_patent_crosswalk" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("granted_patent_crosswalk.tsv.zip", "_temp_patent_crosswalk.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd b/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd deleted file mode 100644 index 57990c9..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/ipcr.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "ipcr" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("ipcr.tsv.zip", "ipcr.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd b/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd deleted file mode 100644 index e95f4fc..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/pct_data.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "pct_data" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("pct_data.tsv.zip", "pct_data.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/publication.Rmd b/04_bulk_pregrant_read_in/R Scripts/publication.Rmd deleted file mode 100644 index edddf57..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/publication.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "publication" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("publication.tsv.zip", "publication.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd b/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd deleted file mode 100644 index 5969bad..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/rawassignee.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "rawassignee" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("rawassignee.tsv.zip", "rawassignee.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd b/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd deleted file mode 100644 index bf66b04..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/rawinventor.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "rawinventor" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("rawinventor.tsv.zip", "rawinventor.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd b/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd deleted file mode 100644 index e399cb6..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/rel_app_text.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "rel_app_text" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("rel_app_text.tsv.zip", "rel_app_text.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd b/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd deleted file mode 100644 index 2d4c8a7..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/us_parties.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "us_parties" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("us_parties.tsv.zip", "us_parties.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd b/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd deleted file mode 100644 index 07861cd..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/uspc.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "uspc" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("uspc.tsv.zip", "uspc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd b/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd deleted file mode 100644 index 91cb5f3..0000000 --- a/04_bulk_pregrant_read_in/R Scripts/usreldoc.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: "usreldoc" -author: "Evelyn" -date: "9/25/2020" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -library(vroom) - -# set directory -knitr::opts_knit$set(root.dir = "") -``` - -## 2. Load patent file -```{r} -data <- vroom::vroom(unz("usreldoc.tsv.zip", "usreldoc.tsv"), - delim = "\t", col_names = TRUE, na = c("", " ", "na", "NA", "N/A")) -``` - -## 3. Get descriptive information -```{r} -head(data) -ncol(data) -nrow(data) -str(data) -summary(data, na.rm=TRUE) -``` diff --git a/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py b/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py deleted file mode 100644 index 20d1f5c..0000000 --- a/05_bulk_pregrant_joins/Python Scripts/join_application_publication.py +++ /dev/null @@ -1,37 +0,0 @@ -# Read-in script for joining the pre-granted application and publication tables -# Importing necessary packages. -import os -import zipfile as zip -import pandas as pd -import csv - -# Set up file path: -# Please include the folder path of the file you are reading. Ex: os.chdir("C:/Users/johnsmith/Downloads") -os.chdir("") - -# specify the name of the application zip file and the name you want to use when unzipped -app_zip = "application.tsv.zip" -app = "application.tsv" - -# specify the name of the publication zip file and the name you want to use when unzipped -pub_zip = "publication.tsv.zip" -pub = "publication.tsv" - -# Selecting the zip files -zf_app = zip.ZipFile(app_zip) -zf_pub = zip.ZipFile(pub_zip) - -# Read the data into dataframes -df_app = pd.read_csv(zf_app.open(app), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) -df_pub = pd.read_csv(zf_pub.open(pub), delimiter="\t", quoting=csv.QUOTE_NONNUMERIC) - -# Rename columns which are the same across both files -df_app = df_app.rename(columns={'id':'id_app', 'date':'date_app', 'country':'country_app'}) -df_pub = df_pub.rename(columns={'id':'id_pub', 'date':'date_pub', 'country':'country_pub'}) - -# Merge the two dataframes together -merged = df_pub.merge(df_app, how="inner", on='document_number') - -# print the first 5 columns and the length of the dataframe -print(merged.head()) -print(len(merged)) \ No newline at end of file diff --git a/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd b/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd deleted file mode 100644 index 9e1fbda..0000000 --- a/05_bulk_pregrant_joins/R Scripts/join_application_publication.Rmd +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: "join application and publication" -author: "Chris" -date: "4/12/2021" -output: html_document ---- - -## 1. Set up -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) - -# load packages -library(data.table) -``` - -## 2. Load application and publication files -## The application file is too large to unzip in R so we recommend unzipping the files manually then running the code -```{r} -# set directory ex: setwd("/Users/username/Downloads") -setwd("") - -# The application file may be too large to read into memory on some computers -# If this is the case you can split the file into smaller pieces like so: -# app <- fread("application.tsv", sep="\t", nrows = 100000, skip = 0) -# where nrows is the number of rows to read from the file and skip is the number -# of rows to skip from the start of the file - -app <- fread("application.tsv", sep="\t") -pub <- fread("publication.tsv", sep="\t") -``` -## 3. Rename columns that are the same across the two files -```{r} -colnames(app)[colnames(app) == 'id'] <- 'id_app' -colnames(app)[colnames(app) == 'date'] <- 'date_app' -colnames(app)[colnames(app) == 'country'] <- 'country_app' - -colnames(pub)[colnames(pub) == 'id'] <- 'id_pub' -colnames(pub)[colnames(pub) == 'date'] <- 'date_pub' -colnames(pub)[colnames(pub) == 'country'] <- 'country_pub' -``` - - -## 3. Merge application and publication data using the document_number column -```{r} -merged <- merge(pub, app, by = "document_number") -``` diff --git a/06_mysql_text_load_in/README.md b/06_mysql_text_load_in/README.md deleted file mode 100644 index 07867c3..0000000 --- a/06_mysql_text_load_in/README.md +++ /dev/null @@ -1,4 +0,0 @@ -## Mysql Text Table Load-In Scripts - -the shell scripts in this folder are designed to load downloaded text table files into a user's own mysql database using a connection specified in the sql.conf file (template provided). -the user must substitute in their own file path for the downloaded tsv files, and may need to change the year suffix for the file. diff --git a/06_mysql_text_load_in/g_brf_sum_text.sh b/06_mysql_text_load_in/g_brf_sum_text.sh deleted file mode 100644 index 7434c20..0000000 --- a/06_mysql_text_load_in/g_brf_sum_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_brf_sum_text_2022.tsv' INTO TABLE patent_text.brf_sum_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/g_claims.sh b/06_mysql_text_load_in/g_claims.sh deleted file mode 100644 index 3855754..0000000 --- a/06_mysql_text_load_in/g_claims.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_claims_2022.tsv' INTO TABLE patent_text.claims_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/g_detail_desc_text.sh b/06_mysql_text_load_in/g_detail_desc_text.sh deleted file mode 100644 index f94fea8..0000000 --- a/06_mysql_text_load_in/g_detail_desc_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_detail_desc_text_2022.tsv' INTO TABLE patent_text.detail_desc_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/g_draw_desc_text.sh b/06_mysql_text_load_in/g_draw_desc_text.sh deleted file mode 100644 index 874ed3e..0000000 --- a/06_mysql_text_load_in/g_draw_desc_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/g_draw_desc_text_2022.tsv' INTO TABLE patent_text.draw_desc_text_2022_test character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/pg_brf_sum_text.sh b/06_mysql_text_load_in/pg_brf_sum_text.sh deleted file mode 100644 index 87a2bbe..0000000 --- a/06_mysql_text_load_in/pg_brf_sum_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_brf_sum_text_2022.tsv' INTO TABLE pgpubs_text.brf_sum_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/pg_claims.sh b/06_mysql_text_load_in/pg_claims.sh deleted file mode 100644 index 1dd118f..0000000 --- a/06_mysql_text_load_in/pg_claims.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_claims_2022.tsv' INTO TABLE pgpubs_text.claims_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/pg_detail_desc_text.sh b/06_mysql_text_load_in/pg_detail_desc_text.sh deleted file mode 100644 index b223430..0000000 --- a/06_mysql_text_load_in/pg_detail_desc_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_detail_desc_text_2022.tsv' INTO TABLE pgpubs_text.detail_desc_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/pg_draw_desc_text.sh b/06_mysql_text_load_in/pg_draw_desc_text.sh deleted file mode 100644 index 032d097..0000000 --- a/06_mysql_text_load_in/pg_draw_desc_text.sh +++ /dev/null @@ -1 +0,0 @@ -mysql --defaults-file=resources/sql.conf --local-infile=1 -e "LOAD DATA LOCAL INFILE '/path/to/file/pg_draw_desc_text_2022.tsv' INTO TABLE pgpubs_text.draw_desc_text_2022 character set utf8mb4 FIELDS TERMINATED BY '\t' ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES;" diff --git a/06_mysql_text_load_in/sql.conf b/06_mysql_text_load_in/sql.conf deleted file mode 100644 index 2575d75..0000000 --- a/06_mysql_text_load_in/sql.conf +++ /dev/null @@ -1,5 +0,0 @@ -[client] -host = -user = -password = -port = 3306 diff --git a/07_PatentSearch_API_demo/README.md b/07_PatentSearch_API_demo/README.md deleted file mode 100644 index 47cb16a..0000000 --- a/07_PatentSearch_API_demo/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# PatentSearch API demo - -The jupyter notebook in this folder demonstrates how to request and read data from the PatentsView PatentSearch API in Python. - -To use this notebok, users must have installed Python and the following Python packages and their dependnencies: -* Jupyter -* Pandas -* Requests -* JSON \ No newline at end of file diff --git a/07_Search_API_demo/PV Search API tutorial.ipynb b/07_Search_API_demo/PV Search API tutorial.ipynb deleted file mode 100644 index e13d45e..0000000 --- a/07_Search_API_demo/PV Search API tutorial.ipynb +++ /dev/null @@ -1,23 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# This file has moved\n", - "## New Name, New Location\n", - "PatentsView's Search API has become the PatentSearch API, and so the tutorial notebook has been renamed to match. \n", - "You can find that resource at https://github.com/PatentsView/PatentsView-Code-Snippets/blob/master/07_PatentSearch_API_demo/PV%20PatentSearch%20API%20tutorial.ipynb \n", - "\n", - "Thank you for using PatentsView!" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/07_Search_API_demo/README.md b/07_Search_API_demo/README.md deleted file mode 100644 index 1207d01..0000000 --- a/07_Search_API_demo/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# This file has moved -## New Name, New Location -PatentsView's Search API has become the PatentSearch API, and so the tutorial notebook has been renamed to match. -You can find that resource at https://github.com/PatentsView/PatentsView-Code-Snippets/blob/master/07_PatentSearch_API_demo/ - -Thank you for using PatentsView! \ No newline at end of file diff --git a/07_PatentSearch_API_demo/PV PatentSearch API tutorial.ipynb b/PatentSearch/0-patentsearch-api-demo.ipynb similarity index 100% rename from 07_PatentSearch_API_demo/PV PatentSearch API tutorial.ipynb rename to PatentSearch/0-patentsearch-api-demo.ipynb diff --git a/07_PatentSearch_API_demo/notebook_images/detail_ex.png b/PatentSearch/notebook_images/detail_ex.png similarity index 100% rename from 07_PatentSearch_API_demo/notebook_images/detail_ex.png rename to PatentSearch/notebook_images/detail_ex.png diff --git a/07_PatentSearch_API_demo/notebook_images/schema_ex.png b/PatentSearch/notebook_images/schema_ex.png similarity index 100% rename from 07_PatentSearch_API_demo/notebook_images/schema_ex.png rename to PatentSearch/notebook_images/schema_ex.png diff --git a/README.md b/README.md index 91011e9..d07fc4b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,21 @@ -# PatentsView-Code-Snippets +[![Pytest](https://github.com/PatentsView/PatentsView-Code-Snippets/actions/workflows/pytest.yml/badge.svg)](https://github.com/PatentsView/PatentsView-Code-Snippets/actions/workflows/pytest.yml) -The code scripts in this repository are for general PatentsView users and serve a variety of purposes. +# PatentsView Code Examples -List of Resources: +Examples working with [PatentsView's bulk data downloads](https://patentsview.org/download/data-download-tables) and [PatentsView's PatentSearch API](https://search.patentsview.org/docs/2024/11/06/2.2-release). -| Folder | Description | -| --- |--- | -| 01_bulk_download_example_joins| *Provides code examples for joining bulk download files*| -| 02_claims_examples| *Provides an example Jupyter Notebook demonstrating how to read claims data files in Python* | -| 03_bulk_download_read_in| *Provides code examples for reading in bulk download files using R and Python*| -| 04_bulk_pregrant_read_in| *Provides code examples for reading in bulk pregrant data download files using R and Python*| -| 05_bulk_pregrant_joins| *Provides code examples for merging two tables with a shared key using R and Python*| -| 06_mysql_text_load_in| *Provides code examples for reading bulk text data files into a user's own mysql database*| -| 07_PatentSearch_API_demo| *Provides an example Jupyter Notebook demonstrating how to request and read data from the PatentsView PatentSearch API in Python*| +## Examples List + +### Bulk Data Downloads +- [Getting Started With PatentsView Data Downloads](data-downloads/0-getting-started.ipynb) + +### PatentSearch API +- [PatentSearch API Demo](patentsearch/0-patentsearch-api-demo.ipynb) + +## Dependencies + +Python and R dependencies are specified in the [`environment.yml`](environment.yml) file and in individual code example files. You can install all dependencies using [**conda**](https://docs.conda.io/projects/conda/en/latest/index.html): +```sh +conda env update +conda activate pv-code-examples +``` \ No newline at end of file diff --git a/data-downloads/0-getting-started.ipynb b/data-downloads/0-getting-started.ipynb new file mode 100644 index 0000000..8a14377 --- /dev/null +++ b/data-downloads/0-getting-started.ipynb @@ -0,0 +1,800 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Started With PatentsView Data Downloads\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Table of contents** \n", + "- 1. [Overview](#toc1_) \n", + "- 2. [Working With GUI Applications](#toc2_) \n", + "- 3. [Working With a Command Line Interface](#toc3_) \n", + "- 4. [Working With Python](#toc4_) \n", + " - 4.1. [Using Pandas](#toc4_1_) \n", + " - 4.2. [Faster Processing with PyDuckDB](#toc4_2_) \n", + "- 5. [Working With R](#toc5_) \n", + " - 5.1. [Using Vroom](#toc5_1_) \n", + " - 5.2. [Using DuckPlyR](#toc5_2_) \n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. [Overview](#toc0_)\n", + "\n", + "[PatentsView](https://patentsview.org) provides downloadable patent data as a set of tab-separated values (tsv) table files. This includes data on patent grants and data on pre-grant publications.\n", + "\n", + "For example, on [PatentsView's Data Downloads](https://patentsview.org/download/data-download-tables) page, you can find a link to the [`g_patent`](https://s3.amazonaws.com/data.patentsview.org/download/g_patent.tsv.zip) table that contains \"data on granted patents.\" The [data dictionary page](https://patentsview.org/download/data-download-dictionary) explains the contents of this table:\n", + "\n", + "| Data Element Name | Definition | Example | Years Present | Type |\n", + "|-------------------|---------------------------------------------------------------------------|-----------------------------|---------------|---------------|\n", + "| patent_id | patent this record corresponds to | 3930271 | all | varchar(20) |\n", + "| patent_type | category of patent. Usually \"design\", \"reissue\", etc. | utility | all | varchar(100) |\n", + "| patent_date | date when patent was granted | 1/6/1976 | all | date |\n", + "| patent_title | title of patent | Golf glove | all | mediumtext |\n", + "| wipo_kind | WIPO document kind codes (http://www.uspto.gov/learning-and-resources/support-centers/electronic-business-center/kind-codes-included-uspto-patent) | A | all | varchar(10) |\n", + "| num_claims | number of claims | 4 | all | int(11) |\n", + "| withdrawn | whether a patent has been withdrawn or not (withdrawn = 1, not withdrawn = 0) | 0 | all | int(11) |\n", + "| filename | name of the raw data file where patent information is parsed from | pftaps19760106_wk01.zip | all | varchar(120) |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. [Working With GUI Applications](#toc0_)\n", + "\n", + "PatentsView's data downloads range in size from a few Mb to multiple Gb. Many have millions of rows. To quickly preview these files, we recommend using [Tad Viewer](https://www.tadviewer.com/), a [DuckDB](https://duckdb.org/)-based desktop application to view tabular data.\n", + "\n", + "Microsoft Excel can be used, but only up to ~1M rows can be loaded at a time. Use [Microsoft PowerQuery](https://learn.microsoft.com/en-us/power-query/) to load and transform a selected subset of data into Excel for analysis.\n", + "\n", + "You can also use more specialized database software such as [DBeaver](https://dbeaver.io/) or [Datagrip](https://www.jetbrains.com/datagrip/) to load and view tables using your preferred database system." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. [Working With a Command Line Interface](#toc0_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download and unzip this table through your browser or using the commands below (using GNU's [wget](https://www.gnu.org/software/wget/))." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "File ‘g_patent.tsv.zip’ already there; not retrieving.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: g_patent.tsv.zip\n" + ] + } + ], + "source": [ + "%%bash\n", + "wget --no-clobber https://s3.amazonaws.com/data.patentsview.org/download/g_patent.tsv.zip\n", + "unzip -n g_patent.tsv.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "PatentsView uses tabs (`\\t`) as a field separator, and the double-quote character (`\"`) to enclose the contents of non-numeric fields. View the first two lines of `g_patent.tsv` file to validate:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"patent_id\"\t\"patent_type\"\t\"patent_date\"\t\"patent_title\"\t\"wipo_kind\"\t\"num_claims\"\t\"withdrawn\"\t\"filename\"\n", + "\"10000000\"\t\"utility\"\t\"2018-06-19\"\t\"Coherent LADAR using intra-pixel quadrature detection\"\t\"B2\"\t20\t0\t\"ipg180619.xml\"\n" + ] + } + ], + "source": [ + "%%bash\n", + "head -n 2 g_patent.tsv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We recommend DuckDB as an embedded analytical database engine. It can be paired with [Halequin.sh](https://harlequin.sh) or another GUI for a user-friendly experience. Here is a read-in example:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌───────────┬─────────────┬─────────────┬─────────────────────────┬───────────┬────────────┬───────────┬───────────────┐\n", + "│ patent_id │ patent_type │ patent_date │ patent_title │ wipo_kind │ num_claims │ withdrawn │ filename │\n", + "│ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │\n", + "├───────────┼─────────────┼─────────────┼─────────────────────────┼───────────┼────────────┼───────────┼───────────────┤\n", + "│ 10000000 │ utility │ 2018-06-19 │ Coherent LADAR using … │ B2 │ 20 │ 0 │ ipg180619.xml │\n", + "│ 10000001 │ utility │ 2018-06-19 │ Injection molding mac… │ B2 │ 12 │ 0 │ ipg180619.xml │\n", + "│ 10000002 │ utility │ 2018-06-19 │ Method for manufactur… │ B2 │ 9 │ 0 │ ipg180619.xml │\n", + "│ 10000003 │ utility │ 2018-06-19 │ Method for producing … │ B2 │ 18 │ 0 │ ipg180619.xml │\n", + "│ 10000004 │ utility │ 2018-06-19 │ Process of obtaining … │ B2 │ 6 │ 0 │ ipg180619.xml │\n", + "└───────────┴─────────────┴─────────────┴─────────────────────────┴───────────┴────────────┴───────────┴───────────────┘\n" + ] + } + ], + "source": [ + "%%bash \n", + "duckdb -c \"SELECT * FROM read_csv('g_patent.tsv', delim='\\t', all_varchar=true) LIMIT 5;\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All data downloads are listed and documented on PatentView's website. For programmatic use, we list tables and download urls in the [`sources.yml`](sources.yml) file. For example, here is a minimal subset of tables for granted patents that we will use in examples (here using [nushell](https://www.nushell.sh/) to select from the yaml file):" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "╭───┬──────────────────────────────╮\n", + "│ 0 │ g_patent │\n", + "│ 1 │ g_location_not_disambiguated │\n", + "│ 2 │ g_assignee_not_disambiguated │\n", + "│ 3 │ g_inventor_not_disambiguated │\n", + "╰───┴──────────────────────────────╯\n" + ] + } + ], + "source": [ + "%%bash\n", + "nu -c \"open sources.yml | get granted.minimal.tables\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. [Working With Python](#toc0_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. [Using Pandas](#toc0_)\n", + "\n", + "Using Pandas, you can load tables from disk or directly from their URL. Here is an example, loading the first five rows of the remote table:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patent_idpatent_typepatent_datepatent_titlewipo_kindnum_claimswithdrawnfilename
010000000utility2018-06-19Coherent LADAR using intra-pixel quadrature de...B2200ipg180619.xml
110000001utility2018-06-19Injection molding machine and mold thickness c...B2120ipg180619.xml
210000002utility2018-06-19Method for manufacturing polymer film and co-e...B290ipg180619.xml
310000003utility2018-06-19Method for producing a container from a thermo...B2180ipg180619.xml
410000004utility2018-06-19Process of obtaining a double-oriented film, c...B260ipg180619.xml
\n", + "
" + ], + "text/plain": [ + " patent_id patent_type patent_date \\\n", + "0 10000000 utility 2018-06-19 \n", + "1 10000001 utility 2018-06-19 \n", + "2 10000002 utility 2018-06-19 \n", + "3 10000003 utility 2018-06-19 \n", + "4 10000004 utility 2018-06-19 \n", + "\n", + " patent_title wipo_kind num_claims \\\n", + "0 Coherent LADAR using intra-pixel quadrature de... B2 20 \n", + "1 Injection molding machine and mold thickness c... B2 12 \n", + "2 Method for manufacturing polymer film and co-e... B2 9 \n", + "3 Method for producing a container from a thermo... B2 18 \n", + "4 Process of obtaining a double-oriented film, c... B2 6 \n", + "\n", + " withdrawn filename \n", + "0 0 ipg180619.xml \n", + "1 0 ipg180619.xml \n", + "2 0 ipg180619.xml \n", + "3 0 ipg180619.xml \n", + "4 0 ipg180619.xml " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Use `dtype=str` to avoid erroneous type inference, e.g. inferring `patent_id` as a number rather than a string.\n", + "pd.read_csv('https://s3.amazonaws.com/data.patentsview.org/download/g_patent.tsv.zip', delimiter=\"\\t\", dtype=str, nrows=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. [Faster Processing with PyDuckDB](#toc0_)\n", + "\n", + "As a faster, more memory-efficient alternative than Pandas, we recommend using [DuckDB](https://duckdb.org/). It can be used directly, through its [Python client API](https://duckdb.org/docs/api/python/overview.html), or via [Ibis](https://ibis-project.org/) for a Pandas-like experience. Here is basic usage using the Python client API:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patent_idpatent_typepatent_datepatent_titlewipo_kindnum_claimswithdrawnfilename
010000000utility2018-06-19Coherent LADAR using intra-pixel quadrature de...B2200ipg180619.xml
110000001utility2018-06-19Injection molding machine and mold thickness c...B2120ipg180619.xml
210000002utility2018-06-19Method for manufacturing polymer film and co-e...B290ipg180619.xml
310000003utility2018-06-19Method for producing a container from a thermo...B2180ipg180619.xml
410000004utility2018-06-19Process of obtaining a double-oriented film, c...B260ipg180619.xml
\n", + "
" + ], + "text/plain": [ + " patent_id patent_type patent_date \\\n", + "0 10000000 utility 2018-06-19 \n", + "1 10000001 utility 2018-06-19 \n", + "2 10000002 utility 2018-06-19 \n", + "3 10000003 utility 2018-06-19 \n", + "4 10000004 utility 2018-06-19 \n", + "\n", + " patent_title wipo_kind num_claims \\\n", + "0 Coherent LADAR using intra-pixel quadrature de... B2 20 \n", + "1 Injection molding machine and mold thickness c... B2 12 \n", + "2 Method for manufacturing polymer film and co-e... B2 9 \n", + "3 Method for producing a container from a thermo... B2 18 \n", + "4 Process of obtaining a double-oriented film, c... B2 6 \n", + "\n", + " withdrawn filename \n", + "0 0 ipg180619.xml \n", + "1 0 ipg180619.xml \n", + "2 0 ipg180619.xml \n", + "3 0 ipg180619.xml \n", + "4 0 ipg180619.xml " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import duckdb\n", + "\n", + "con = duckdb.connect(database=':memory:')\n", + "con.read_csv('g_patent.tsv', delimiter='\\t', dtype=[\"string\"]*8).limit(5).df()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To directly read a remote zip file with DuckDB's Python client, you need some utilities:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patent_idpatent_typepatent_datepatent_titlewipo_kindnum_claimswithdrawnfilename
010000000utility2018-06-19Coherent LADAR using intra-pixel quadrature de...B2200ipg180619.xml
110000001utility2018-06-19Injection molding machine and mold thickness c...B2120ipg180619.xml
210000002utility2018-06-19Method for manufacturing polymer film and co-e...B290ipg180619.xml
310000003utility2018-06-19Method for producing a container from a thermo...B2180ipg180619.xml
410000004utility2018-06-19Process of obtaining a double-oriented film, c...B260ipg180619.xml
\n", + "
" + ], + "text/plain": [ + " patent_id patent_type patent_date \\\n", + "0 10000000 utility 2018-06-19 \n", + "1 10000001 utility 2018-06-19 \n", + "2 10000002 utility 2018-06-19 \n", + "3 10000003 utility 2018-06-19 \n", + "4 10000004 utility 2018-06-19 \n", + "\n", + " patent_title wipo_kind num_claims \\\n", + "0 Coherent LADAR using intra-pixel quadrature de... B2 20 \n", + "1 Injection molding machine and mold thickness c... B2 12 \n", + "2 Method for manufacturing polymer film and co-e... B2 9 \n", + "3 Method for producing a container from a thermo... B2 18 \n", + "4 Process of obtaining a double-oriented film, c... B2 6 \n", + "\n", + " withdrawn filename \n", + "0 0 ipg180619.xml \n", + "1 0 ipg180619.xml \n", + "2 0 ipg180619.xml \n", + "3 0 ipg180619.xml \n", + "4 0 ipg180619.xml " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from zipfile import ZipFile\n", + "from urllib.request import urlopen\n", + "from io import BytesIO\n", + "\n", + "def zipfile_from_url(filename: str, zipped_url: str):\n", + " \"\"\"Open file from remote ZIP archive.\"\"\"\n", + " with urlopen(zipped_url) as remote:\n", + " with ZipFile(BytesIO(remote.read())) as file:\n", + " return file.open(filename)\n", + "\n", + "remote_table = zipfile_from_url(\"g_patent.tsv\", \"https://s3.amazonaws.com/data.patentsview.org/download/g_patent.tsv.zip\")\n", + "con.read_csv(remote_table, delimiter='\\t', dtype=[\"string\"]*8).limit(5).df()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. [Working With R](#toc0_)\n", + "\n", + "Using R is similar to Python. First, we'll load the rpy2 extension to execute R code in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext rpy2.ipython\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1. [Using Vroom](#toc0_)\n", + "The vroom package does not support reading remote compressed files, but it can read local zip files." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rows: 8980130 Columns: 8\n", + "── Column specification ────────────────────────────────────────────────────────\n", + "Delimiter: \"\\t\"\n", + "chr (5): patent_id, patent_type, patent_title, wipo_kind, filename\n", + "dbl (2): num_claims, withdrawn\n", + "date (1): patent_date\n", + "\n", + "ℹ Use `spec()` to retrieve the full column specification for this data.\n", + "ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.\n", + "# A tibble: 8,980,130 × 8\n", + " patent_id patent_type patent_date patent_title wipo_kind num_claims withdrawn\n", + " \n", + " 1 10000000 utility 2018-06-19 Coherent LA… B2 20 0\n", + " 2 10000001 utility 2018-06-19 Injection m… B2 12 0\n", + " 3 10000002 utility 2018-06-19 Method for … B2 9 0\n", + " 4 10000003 utility 2018-06-19 Method for … B2 18 0\n", + " 5 10000004 utility 2018-06-19 Process of … B2 6 0\n", + " 6 10000005 utility 2018-06-19 Article vac… B2 4 0\n", + " 7 10000006 utility 2018-06-19 Thermoformi… B2 8 0\n", + " 8 10000007 utility 2018-06-19 PEX expandi… B2 24 0\n", + " 9 10000008 utility 2018-06-19 Bracelet mo… B2 11 0\n", + "10 10000009 utility 2018-06-19 Sterile env… B2 21 0\n", + "# ℹ 8,980,120 more rows\n", + "# ℹ 1 more variable: filename \n", + "# ℹ Use `print(n = ...)` to see more rows\n" + ] + } + ], + "source": [ + "%%R\n", + "library(vroom)\n", + "\n", + "# Vroom properly infers data types in this case.\n", + "vroom(\"g_patent.tsv.zip\", delim = \"\\t\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2. [Using DuckPlyR](#toc0_)\n", + "\n", + "We recommend using the Tidyverse's [DuckPlyR](https://duckplyr.tidyverse.org/) as a user-friendly R client API for DuckDB." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "duckplyr: materializing\n", + "# A tibble: 8,980,130 × 8\n", + " patent_id patent_type patent_date patent_title wipo_kind num_claims withdrawn\n", + " \n", + " 1 10000000 utility 2018-06-19 Coherent LA… B2 20 0 \n", + " 2 10000001 utility 2018-06-19 Injection m… B2 12 0 \n", + " 3 10000002 utility 2018-06-19 Method for … B2 9 0 \n", + " 4 10000003 utility 2018-06-19 Method for … B2 18 0 \n", + " 5 10000004 utility 2018-06-19 Process of … B2 6 0 \n", + " 6 10000005 utility 2018-06-19 Article vac… B2 4 0 \n", + " 7 10000006 utility 2018-06-19 Thermoformi… B2 8 0 \n", + " 8 10000007 utility 2018-06-19 PEX expandi… B2 24 0 \n", + " 9 10000008 utility 2018-06-19 Bracelet mo… B2 11 0 \n", + "10 10000009 utility 2018-06-19 Sterile env… B2 21 0 \n", + "# ℹ 8,980,120 more rows\n", + "# ℹ 1 more variable: filename \n", + "# ℹ Use `print(n = ...)` to see more rows\n" + ] + }, + { + "data": { + "text/plain": [ + "The duckplyr package is configured to fall back to dplyr when it encounters an\n", + "incompatibility. Fallback events can be collected and uploaded for analysis to\n", + "guide future development. By default, no data will be collected or uploaded.\n", + "→ Run `duckplyr::fallback_sitrep()` to review the current settings.\n", + "✔ Overwriting dplyr methods with duckplyr methods.\n", + "ℹ Turn off with `duckplyr::methods_restore()`.\n", + "\n", + "Attachement du package : ‘duckplyr’\n", + "\n", + "Les objets suivants sont masqués depuis ‘package:stats’:\n", + "\n", + " filter, lag\n", + "\n", + "Les objets suivants sont masqués depuis ‘package:base’:\n", + "\n", + " intersect, setdiff, setequal, union\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%R\n", + "library(\"duckplyr\")\n", + "\n", + "# Use \"string\" type to avoic errenous type inference, such as inferring `patent_id` as a number rather than a string.\n", + "duckplyr_df_from_csv(\"g_patent.tsv\", options=list(delim=\"\\t\", all_varchar=TRUE))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pv-code-examples", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data-downloads/sources.yml b/data-downloads/sources.yml new file mode 100644 index 0000000..5f3a2cf --- /dev/null +++ b/data-downloads/sources.yml @@ -0,0 +1,121 @@ +# Last updated: December 2024 + +granted: + minimal: + database: https://s3.amazonaws.com/data.patentsview.org/download + url_template: "{database}/{table}.tsv.zip" + tables: + - g_patent + - g_location_not_disambiguated + - g_assignee_not_disambiguated + - g_inventor_not_disambiguated + downloads: + database: https://s3.amazonaws.com/data.patentsview.org/download + url_template: "{database}/{table}.tsv.zip" + tables: + - g_patent + - g_location_not_disambiguated + - g_assignee_not_disambiguated + - g_inventor_not_disambiguated + - g_applicant_not_disambiguated + - g_application + - g_assignee_disambiguated + - g_attorney_disambiguated + - g_attorney_not_disambiguated + - g_botanic + - g_cpc_at_issue + - g_cpc_current + - g_cpc_title + - g_examiner_not_disambiguated + - g_figures + - g_foreign_citation + - g_foreign_priority + - g_gov_interest + - g_gov_interest_contracts + - g_gov_interest_org + - g_inventor_disambiguated + - g_ipc_at_issue + - g_location_disambiguated + - g_other_reference + - g_patent_abstract + - g_pct_data + - g_persistent_assignee + - g_persistent_inventor + - g_rel_app_text + - g_us_application_citation + - g_us_patent_citation + - g_us_rel_doc + - g_us_term_of_grant + - g_uspc_at_issue + - g_wipo_technology + brief_summary: + database: https://s3.amazonaws.com/data.patentsview.org/brief-summary-text + url_template: "{database}/g_brf_sum_text_{table}.tsv.zip" + tables: &years-granted [1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] + claims: + database: https://s3.amazonaws.com/data.patentsview.org/claims + url_template: "{database}/g_claims_{table}.tsv.zip" + tables: *years-granted + detail_description: + database: https://s3.amazonaws.com/data.patentsview.org/detail-description-text + url_template: "{database}/g_detail_desc_text_{table}.tsv.zip" + tables: *years-granted + drawing_description: + database: https://s3.amazonaws.com/data.patentsview.org/draw-description-text + url_template: "{database}/g_draw_desc_text_{table}.tsv.zip" + tables: *years-granted + +pre-grant: + minimal: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/{table}.tsv.zip" + tables: + - pg_published_application + - pg_applicant_not_disambiguated + - pg_assignee_not_disambiguated + - pg_inventor_not_disambiguated + downloads: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/{table}.tsv.zip" + tables: + - pg_published_application + - pg_applicant_not_disambiguated + - pg_assignee_not_disambiguated + - pg_inventor_not_disambiguated + - pg_assignee_disambiguated + - pg_cpc_at_issue + - pg_cpc_current + - pg_cpc_title + - pg_foreign_priority + - pg_gov_interest + - pg_gov_interest_contracts + - pg_gov_interest_org + - pg_granted_pgpubs_crosswalk + - pg_inventor_disambiguated + - pg_ipc_at_issue + - pg_location_disambiguated + - pg_location_not_disambiguated + - pg_pct_data + - pg_persistent_assignee + - pg_persistent_inventor + - pg_published_application_abstract + - pg_rel_app_text + - pg_us_rel_doc + - pg_uspc_at_issue + - pg_wipo_technology + brief_summary: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/pg_brf_sum_text_{table}.tsv.zip" + tables: &years-pre-grant [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] + claims: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/pg_claims_{table}.tsv.zip" + tables: *years-pre-grant + detail_description: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/pg_detail_desc_text_{table}.tsv.zip" + tables: *years-pre-grant + drawing_description: + database: https://s3.amazonaws.com/data.patentsview.org/pregrant_publications + url_template: "{database}/pg_draw_desc_text_{table}.tsv.zip" + tables: *years-pre-grant diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..b42daf4 --- /dev/null +++ b/environment.yml @@ -0,0 +1,35 @@ +name: pv-code-examples + +channels: + - conda-forge + +dependencies: + # System dependencies + - nushell=0.99 + - duckdb-cli=1.1 + + # R dependencies + - r-base=4.4 + - r-tidyverse=2.0 + - r-vroom=1.6 + - r-duckplyr=0.4 + + # Python dependencies + - python=3.13 + - pip + - pip: + - jupyter==1.1.* + - ipykernel==6.29.* + - pandas==2.2.* + - duckdb==1.1.* + - fsspec==2024.10.* + - pyyaml==6.0.* + + # R in Jupyter notebook + - rpy2==3.5.* + + # Development + - black + - isort + - pytest + - testbook \ No newline at end of file diff --git a/patent.Rmd b/patent.Rmd deleted file mode 100644 index a29f4b6..0000000 --- a/patent.Rmd +++ /dev/null @@ -1,29 +0,0 @@ ---- - title: "read in patent.tsv and summarize" -output: html_document ---- - #### Note: This HTML document was last generated on `r Sys.Date()` - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -library(data.table) -#TODO: change directory as needed -setwd("") -``` -##Loads in data - -```{r load data} -data<- read.delim(unz("patent.tsv.zip", "patent.tsv"), header=TRUE, sep ="\t", comment.char="#", stringsAsFactors = FALSE, quote="", fill = TRUE) -``` - -## displays summary of data - -```{r view data} -head(data) -nrow(data) -ncol(data) -str(data) -summary(data) - - -``` diff --git a/tests/test_data_downloads.py b/tests/test_data_downloads.py new file mode 100644 index 0000000..f6b993f --- /dev/null +++ b/tests/test_data_downloads.py @@ -0,0 +1,29 @@ +import yaml +import subprocess +import pytest +from testbook import testbook + +# Change the working directory to the Data Downloads folder for all tests in this file. +@pytest.fixture(autouse=True) +def change_test_dir(monkeypatch): + monkeypatch.chdir('data-downloads') + + +@testbook('data-downloads/0-getting-started.ipynb', execute=True) +def test_0_introduction(nb): + ... # Just check that notebook runs without error. + + +def test_sources(): + with open("sources.yml") as file: + sources = yaml.safe_load(file) + + for data in [sources['granted'], sources['pre-grant']]: + for source in data.values(): + database = source['database'] + url_template = source['url_template'] + for table in source['tables']: + table_url = url_template.format(database=database, table=table) + + print("Checking ", table_url) + subprocess.run(['curl', '--head', '--fail', table_url], check=True) \ No newline at end of file