diff --git a/assets/specs/model-csv-to-pmml/flow-charts/beta-coefficients-file.drawio b/assets/specs/model-csv-to-pmml/flow-charts/beta-coefficients-file.drawio new file mode 100644 index 0000000..f5338bf --- /dev/null +++ b/assets/specs/model-csv-to-pmml/flow-charts/beta-coefficients-file.drawio @@ -0,0 +1 @@ +7Zpbc6IwFMc/jY+7w03Ux9a17c62M059aF8jHCG7IbEheNlPvwGCiFpA7VboONPphMMhl3Pyy1+OdsxhsLrnaO4/MRdIx9DcVcf80TEM3TKMTvynuevU0rOVwePYVU65YYL/gjJqyhphF8KCo2CMCDwvGh1GKTiiYEOcs2XRbcZIcdQ58mDPMHEQ2be+YFf4qbVv9HL7A2DPz0bW7UF6J0CZs1pJ6COXLbdM5qhjDjljIm0FqyGQOHhZXNLn7t65u5kYByrqPDAYmC/ryevDmIS/fr6+Pc/Ct/W3btrLApFILVhNVqyzCIArA6IuGRc+8xhFZJRbbzmLqAvxMJq8yn0eGZtLoy6Nv0GItcouigSTJl8ERN3dX4paXcgi7kDJ/G21JRD3QJT49VO/eC1bA6hA3QMLQPC1dOBAkMCLYvKR2kPexi8Ps2yoSB8RdXsv6jeuGw/UiXNoE7mS26lseHFjjDiSwwJ/xKHYcpAjb3yo5C4hI/7nw6Fu7oECR+QZPA5hiBnNWH2/v52NkKc5ztnSxwImc5SkZynhL6YUTUNGIgE33FFpT6z5lSUvQ8HZnw1XRtlOWAAXsCrNnbprGlb6iDpxuoq/ZY6v3lc2fxtdTftP6e61HLJ+TciyM7shlPVLKKNRMAUuG2xWylwJH2EFcEdwC28RIsXuihNcII7RlCRjYpo44bg9w3JvGMM4rySKDYxConJ80xGmcgoOzEVrebaM790C0ZsPB9tIW+ZnIj1oOdJZBKuZNhrFdDbvetJ5hxzBrrp5mm6avYsLp67vZbtlmBl1MbOahZlRVzttFMQ7mE7DeRKpXRHk4OIYwjOEtC7FlSrqIAEe49hJ3A5oKmwkdXdZXwPprjG4ONJm25G26iJtNwtp6xjlHLIEDwFX8axJWvEjqj04IJ693qeS1vbajl63uKM3q7qjl5V3ql48P0Avj2C3WjIZFZhGLHm5LFXMtnJbVMj+5atFetvLRXrdelFWGW8Kt2UFo31Sx09IcLy6iuPRkOmabmYlnQty1vYaToZPNWfNquFk8z5JH8dqhJOrsrWwPbMg+0Uw1S9fATLaXgHK5lsNabMqQNm8j/iO8qqHp4JmmpcHraw8UHUon3smb++e/S52q3FnnNH560rd78w02Qq+zEazDpUlPmqjycv81yzJva3fBJmjfw== \ No newline at end of file diff --git a/assets/specs/model-csv-to-pmml/flow-charts/fine-and-gray-step.drawio b/assets/specs/model-csv-to-pmml/flow-charts/fine-and-gray-step.drawio index e565075..01178ab 100644 --- a/assets/specs/model-csv-to-pmml/flow-charts/fine-and-gray-step.drawio +++ b/assets/specs/model-csv-to-pmml/flow-charts/fine-and-gray-step.drawio @@ -1 +1 @@ -7VzbcqM4EP0aPyZlrrYfE+cyWzXZ8kxma2eetmSQQTuAvELE9nz9SlzMRRgrY+KgJFWpFDQCpJZOn+5W45ExD7f3BKz9B+zCYKSP3e3IuBnpumbq+oj/jd1dJpnYucAjyM0blYJH9AvmwnEuTZAL41pDinFA0boudHAUQYfWZIAQvKk3W+Gg/tY18KAgeHRAIEr/Ri71M+lUn5TyTxB5fvFmzZ5lV0JQNM5HEvvAxZuKyLgdGXOCMc2Owu0cBlx5hV6y++4OXN13jMCIytywnC5+Jn99WURfzDuAnu6/kh/WRdG5JxAk+Yjz3tJdoQLoMo3kp5hQH3s4AsFtKb0mOIlcyN8zZmdlm88Yr5lQY8J/IaW7fHpBQjET+TQM8qviWPLhxTghDuwYQLEmAPEg7Rpo1o6PpfKCXFP3EIeQkh1rQGAAKHqqzz7IF5G3b1fqmR3kqn6G2jVB69fQQ5Gg+pgS/HO/8PS60tiCWvN24dbj2LtcBXjj+IDQy5ipg/7Dm298ROHjGqQ63LBmXcp+goTCbad68quGbWa35Ki28mW0KSGijXOZX4VHIexdo+ZU8YWsSy5kczKolawLar9yXf6iETctdsCGcr1kBx4/uIcRJCD4Cj0C4xjhqOCLoiXrwr5xxK6l1p7/8/nh94fPnEmwk4Rcvc3pLSdPO7DyKxMFljEOEgqviJNPZiotz0yOMAF+fWBHq2NnT3UV8OjnBY8uzqJa4DEkwVOofijgsd6J3oeldkPQ+h/x3sisUAC/7dawzX4tIQUXDoarFXIQUxbr2F2b6eqLx13oIG4mX5DIG8bInr2+MbIVB4UpCQprUKAwn0PkC0AAey0kn1FMpfm7D39ATc63JuOjMNOm54XZRHGYWZIwswcFM5HxS5hFSbiEhB3gVSfoOgASH0HcM4AL/0tAUH9cvYNPgCCwDNJ38tiVNUJxzqAjfc6nNUi4AEcpsWKyfxCKWBccuFbWiZ9qEoA2jbMCWvUI2JYE9LACYNFb6eDNO+BQ/EGachjTxjIgOzNrzhQH2UQSZNNBgUz0VQ6wpg1Cvn6jZZwFSE36I9BFHIInUKgsho/ypwMo9DBBTtqshU2LcFQclqKA3ueJhwNoQ/UNkKkkoGeDArToq3Sw5hyn4KDwgzjlcGbOJHA2mZwVZ+KWl1o4m0nirPBZBgI00V+Rjzd7IMtnQPc4X+KIoijBaUzZSZeKwnZiDI8edcVhW8DxOG61QeFWE/2Srnzs4gFQgrYf5HgcZbPh5WINcUdKMZTJluFo+rBQJnol8vS4yN/w27lYKdCemIZVE6RlUcSAQCrukCkGUl0WpMawQCr6IMe2Jj/YUBZo5gBTMtZr4IrpkOy+V09+8IddWsXpzTZ/eHa2q54tIEHZ1lwmPAGk0qVMvWdo01uvCAG7SoM1RrzOpXzyggsqC2im1xfQxGyUIR+7QRtPx401k3WiXEH70ZxgREQXS57pTyX6qklqqxaqp3dPIP4yBJbdfh2zo1BZ62XLJLrOvEOkvC8vW7/UvwU6jTZUrxuTV/ywKsc00TGWracEMQxQBC988AsQN261jmmJZff+13DLK/V6eeXUsl7bPhmq131psoVf2rAqvzSx9Ose0j1O0k/V6jQt4IUyJ1PGgyg8A/48gsP9AytyF1KAAv7G2IdQ2TqsZmFlK8DOW79sqF6HpckWYmnDqsTSnlWKdc24Z56En1Lm+Vb40R/5gm68NbeohkBoM8Xg1Ttq2gNtbWwfCLT3D8kQnt/XmIY+Ym7psq12eHLX8MTQuxXl/YXgOWm/lbR7c2tsAOjWlc+6yxZoacOq0DLHaur5pc2q0diZKvKdx4zqsxOr5gHrfTCxarZ37GXTqqKzW4n9YwrXI164EyRhVNjM3I7CLc0MaJvZXvGkAIjcCy/t8Rv7yLIsbD/6lWVzcfXnLIuFYbcR5+YFIDGKPHZ0h9LEMUjF99nqfWQzGveleZ56RhHgFWZn+6UKre1z+9YI0X4p1Zti0cEPKCqVcoTUNZmqeY4DrrCbiOf1OVSCoCECAfLYcr5xYJRuSV1zdfEy86v8QohcN7XXbUqv2/AepsBqLP/JWJwBs2UC9BebAP19T8B08toTIO6R/Infrv7NZha4xQSdV/+iU/2e9D979fUvpoLfk/41zX7tCRBThW+aAbTGPvl0Jgb2Z50AS/Ww3pT9kLKorxpKWN+VkxPisAcUsVjg0fFhWL3ekh3fIOpXilBOz5BfCutBkSQaC7xrWDNa2EY/bxLNUv13pgoQHUXbsCpXi27/TgI8g94dgoHbgRKeazmapU5/DnJ0sGCsa1e4s4xsnX0gBnkHCYp/jhrbzWEG8c5nZFva1T6kPWvrk6rmQKuZA8sQqVef2v2YA3Za/qBsluMrf5bXuP0f \ No newline at end of file +5Vtbc5s4FP41fkzGgLHxY67uzjS7adPuNk8dGY5BG4G8QsR2f/1KIK7ClyaObZKZTAYOAqTvnO/chHvWVbicMDQP7qgHpGf2vWXPuu6ZpjEwzZ7863urTDIaKoHPsKcGlYIH/AuUsK+kCfYgrg3klBKO53WhS6MIXF6TIcbooj5sRkn9rXPkgyZ4cBHRpf9gjweZ1DFHpfwTYD/I32wMx9mVEOWD1UriAHl0URFZNz3rilHKs6NweQVEgpfjkt13u+ZqMTEGEd/lhqlz/5R8/3IffRncIvw8+coe7bN8cs+IJGrFarZ8lUMAnkBEnVLGA+rTCJGbUnrJaBJ5IN/TF2flmM+UzoXQEMJ/gfOVUi9KOBWigIdEXdXXopYX04S5sGEBuU0g5gPftNBsnFxL5QUKqQnQEDhbiQEMCOL4ua59pIzIL8aVOIsDBfVvwG5oqF+CjyMN+pgz+lQYnlkHTRjUXI4Ll77k3vmM0IUbIMbPYwEH/ymHLwLM4WGOUgwXYtgmsJ+BcVhuhEddtYaD7BbFaluZ0aKkiNFXsqBKj1y4d0QHTscN2dzRkAejk7JkU4P9wvPki3rStQyJWMrlVBz48mACETBEvoLPII4xjfJ4kY8UUygGR+Ja6u3lv0Ae/rj7LCMJdZNQwttUb6k8Y43lVxSFpjElCYcL5iplptLybCAZptFvH9wx6twpQl2FPOZhyWPqWuwWeawdyZNDf3jy3CQk6f9tTrg5ufg55c5f49hpCQPHg70ByRvrwTqmz7I02P+ICyczwwS+rebQ5r+mwNGZS2E2wy4W4IiJ3ba5rn3FcQ9cLN3kGwbyhjMajo/ujCz7GCQQELLVj+rJo3zYuZ2fXi/Vw7OzVfXsHhgWiwemhC8nkLEzg5x9e7L01gvG0KoyYE6xNPPyyfdSUNqPURhHXt8NGlXIthuMvtNv2Ew2idKCitW8IuHWOf997iEOBe/3kqwkMY780pUIIsvSVHJZTjAqrmiO5Ey6HfEwFEpiR9N43uZGOpLh2KO6ituciuEcNsPR9d+tDCf300dwDK/z5sMPA7x9UsDn835BloNiIDiCswD9QsyLWzOcNPHZ7K1ON+kx60mPY9vH9k/WqOs0sXelyfC0aGJrwE+AFzxJG8iybczWJwpc5H7tVUCdIM+IYTQlKf0YDYsHVuSeyAswkW+MA4DOtjiaCUArwQ5cVXS9P5jzZjvBTqtBaOgJwIYO4aWIPVdJ+CmNPN8kK+LNGXelPbiPDL6bfBtZO/DtwAFt3DF67Z01a+rf/nBN/Vs8JGO4uq+hhn2UwnqqURIySsIpyFhHZ+voKVND9Z51PIq3ELOV5dsDqKI8/JcgUn9DfdoqaKt6G8cq3+0qu8e7RNMDl9N6cdExuju7RtPxSUXTQb+bOL+1W7WMOkfyNuQ2p/q7/U5zsMZ7r+t3Nm9o9EffqNupJ7uV2j/mMO/JL2ZIEka5z1R+FJY8c6BtbnsmmwIo8s78dMbvbOvD6O+8Eds0rv0ly2NNcTeRjM33iKnW8q1Qgnx1Kp5k1vsgNBrvC3kOLMQR4qLkPdj3I0bbJnhrhTh8K+gH+mbsI+igcsmQOpIpzFeUSMCuIxpBShVCGiJEsC/M+doVsMmdoksJF3YRuVAXQux5qb9uA73uw/egArth/qO+roFBiwLMN1OA+bEV4IyOrQB9j+RP+n7xHzS7wC0u6LD460n1R8J/fHT711vBHwl/wxgeWwF6q/BdRwBjXK8NnLFe2B9UAXbXy/r849itZX3+wfKplPWbenJaHXaHI1ELPLgBhNXrLd3xBeZB5bOT13fIzzV76EgTTRTeNa5ZLdHGPGwTzdbjTbfYlpNoK9uskyJbPu2XNMAz6t1iIN4Glshey9YudfojjV65/VtpWW/bFb6SCiOJlNC0G1Ddn54z8LDLQU6Q4fip19huDjOKb3xGtqVdnUM6s7Y5ddUdGDV3YFt66DWd4X7cgTgtf+aV9fjKH8tZN/8D \ No newline at end of file diff --git a/assets/specs/model-csv-to-pmml/flow-charts/linear-regression-step.drawio b/assets/specs/model-csv-to-pmml/flow-charts/linear-regression-step.drawio new file mode 100644 index 0000000..d132b19 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/flow-charts/linear-regression-step.drawio @@ -0,0 +1 @@ +1VhtU+IwEP41fMSBthT8qIico84ozM35zQnt0gbTpJdueblff0lJaWuBwTtUmHEwebJtNs/usxto2P1oOZQkDh+FD6xhtfxlw75pWFbbsayG/mv5qzXSdQ0QSOobowIY0z9gwJZBU+pDUjFEIRjSuAp6gnPwsIIRKcWiajYVrLprTAKoAWOPsDr6i/oYrtGe1S3wH0CDMN+57V6uVyKSG5uTJCHxxaIE2YOG3ZdC4HoULfvANHk5L+vnbnesbhyTwPGQB0aj2/Go+3xFXu8v/c79cHYTOk3zljlhqTmwcRZXOQPgK0LMVEgMRSA4YYMCvZYi5T7obVpqVtg8CBErsK3AGSCuTHRJikJBIUbMrNaPYvxKRCo92OO/cReJDAD32DlrO32W0gaGqCGICFCulIEERpDOq8EnJoeCjV1BsxoYpj/AulVj/RoCymvUJyjF2ybvrCppKp9ibRctAy29iykTCy8kEi8SRQe+avNFSBHGMck4XCizfWTPQSIs99JjVm3X8GlE3TEpvigU0m4ZLCyrIwePzqh95nnsHJrHvZNKZKdG+5Xv640auhi5TB3leqIGgR4MgYMkbASBhCShgufdIrdULmyMuVrLar3+CPXw5fFB9xHhpZGm9314i+C1dyR+KVBkkgiWIlxJzwQzQ4uZowVWU98xpNOuSmfT6Erasb5WO06rFsUB11F8IjKhPFCjB8qBSDUogqcmY4T4WDULQUaUExTy68pW79C65X4a9d1zr1u9AwtXTvWpFK7eRyrXI+VKB2MvhKi8vqVeLSiGupGbmvX/NfDiXAud1a2Kze5uKXS9zyp0/Tv7jj29vtmz2Rs4z/Nlx3lpuicktnc6+Df15aIqq2/ryTvfqbVOvb8UWuNpNAHdWcR0t/JuKTB/j0gSTejvlLDqjaH67uxWnLkhKZmw7KGSVHNY3zEACWVJ9p0NFK9WX8eHpRoRHLKvkXLzYCzBpx6CdlCk6CnWtIUU0cYk0iI/YyW3K0ru2J0tSna/Usmn1DY/quTdAj3k/t/aHqqjK3mflyXaf8Y+QThiv1NLqbl0rl85UWJsegKmU+pRxWvSnNJMpfruqP5l18dvlFZNR1sS4GBpuZfbmuSRpKWmxa9N2VrpNzt78Bc= \ No newline at end of file diff --git a/assets/specs/model-csv-to-pmml/images/beta-coefficients-file.png b/assets/specs/model-csv-to-pmml/images/beta-coefficients-file.png new file mode 100644 index 0000000..6675cad Binary files /dev/null and b/assets/specs/model-csv-to-pmml/images/beta-coefficients-file.png differ diff --git a/assets/specs/model-csv-to-pmml/images/fine-and-gray-step.png b/assets/specs/model-csv-to-pmml/images/fine-and-gray-step.png index c4d9326..232b5fc 100644 Binary files a/assets/specs/model-csv-to-pmml/images/fine-and-gray-step.png and b/assets/specs/model-csv-to-pmml/images/fine-and-gray-step.png differ diff --git a/assets/specs/model-csv-to-pmml/images/linear-regression-step.png b/assets/specs/model-csv-to-pmml/images/linear-regression-step.png new file mode 100644 index 0000000..5d04979 Binary files /dev/null and b/assets/specs/model-csv-to-pmml/images/linear-regression-step.png differ diff --git a/assets/specs/model-csv-to-pmml/model-csv-to-pmml-spec.Rmd b/assets/specs/model-csv-to-pmml/model-csv-to-pmml-spec.Rmd index a79b881..2178363 100644 --- a/assets/specs/model-csv-to-pmml/model-csv-to-pmml-spec.Rmd +++ b/assets/specs/model-csv-to-pmml/model-csv-to-pmml-spec.Rmd @@ -88,10 +88,14 @@ model_steps_file_metadata <- data.frame( center;
interaction;
rcs;
- fine-and-gray
", + fine-and-gray;
+ linear-regression;", + "beta-coefficients;
baseline-hazards;
", + "N/A", + "N/A" ), enumLabels = c( @@ -99,10 +103,14 @@ model_steps_file_metadata <- data.frame( Center a set of variables;
Create interaction terms;
Create restricted cubic spline terms;
- Calculate the outcome of a fine and gray model
", + Calculate the outcome of a fine and gray model;
+ Calculate the outcome of a linear regression model;
", + "Used to specify the beta coefficients file for a fine and gray step;
- Used to specify the baseline hazards file for a fine and gray step", + Used to specify the baseline hazards file for a fine and gray step;
", + "N/A", + "N/A" ), restrictions = c("mandatory", "", "mandatory", "") @@ -382,16 +390,7 @@ DT::datatable(eg_interaction_step) The example above creates two new interaction variables, `AgeXCancer` and `AgeXHypertension`. `AgeXCancer` is created using the `Age` and `Cancer` variables while `AgeXHypertension` is created using the `Age` and `Hypertension` variables. Both new interaction variables are continuous. Notice how the interacting variables are seperated by a semi-colon in the example. -## Fine and Gray Model Step Files - -Similar to a cox regression model, a fine and gray model estimates the risk of an event occuring at some time in the future, the difference being that it takes competing risks into account. Specifying this step requires two files, - -1. A beta coefficients file which specifies the names of the covariates in the model and their beta coefficient -2. A baseline hazards file which specifies a time value and the baseline hazard value associated with it - -The metadata for the beta coefficients file is given below: - -```{r echo=FALSE} +```{r} beta_coefficients_step_file_metadata <- data.frame( fileType = c("beta coefficients step", "beta coefficients step", "beta coefficients step"), columnName = c("variable", "coefficient", "type"), @@ -420,7 +419,18 @@ beta_coefficients_step_file_metadata <- data.frame( "mandatory" ) ) +``` + +## Fine and Gray Model Step Files +Similar to a cox regression model, a fine and gray model estimates the risk of an event occuring at some time in the future, the difference being that it takes competing risks into account. Specifying this step requires two files, + +1. A beta coefficients file which specifies the names of the covariates in the model and their beta coefficient +2. A baseline hazards file which specifies a time value and the baseline hazard value associated with it + +The metadata for the beta coefficients file is given below: + +```{r echo=FALSE} DT::datatable(beta_coefficients_step_file_metadata, escape = FALSE) ``` @@ -504,6 +514,35 @@ The beta coefficients file says that the model has two covariates, `Age` and `Se The baseline hazards has 5 rows and describes the baseline hazards for each year in the model. Notice how the lowest time value is `1` and the highest value is `5`. This is because of the time variable we specified earlier which goes from 1 to 5. +## Linear Regression Model + +A linear regression model is used to predict a continuous outcome from a set of covariates. Adding this step requires a beta coefficients file which specifies the names of the covariates in the model and their beta coefficient value. The metadata for the beta coefficients file is given below: + +```{r echo=FALSE} +DT::datatable(beta_coefficients_step_file_metadata, escape = FALSE) +``` + +An example model steps file with a linear regression step are shown below are shown below. + +```{r echo=FALSE} +eg_linear_regression_model_steps_file <- data.frame( + "step" = c("linear-regression"), + "fileType" = c("N/A"), + "filePath" = c("beta-coefficients.csv") +) +DT::datatable(eg_linear_regression_model_steps_file) + +eg_beta_coefficients_csv <- data.frame( + "variable" = c("Age", "Sex"), + "coefficient" = c(0.01, 2) +) +DT::datatable(eg_beta_coefficients_csv) +``` + +The model steps file has a single row for a step to calculate the outcome of a linear regression model. Since this step requires only one file to fully specify it, we can enter in a value of `N/A` in the `fileType` column. The `filePath` column has the path to the file with the covariates and the beta coefficients for the model. Once again the paths should be relative to the model steps file that described them. + +The beta coefficients file says that the model has two covariates, `Age` and `Sex` and specifies the beta coefficients for each one which are `0.01` and `2` respectively. + # Business Logic This section will go through the logic for transforming a set of model CSV files into PMML. The flowchart below broadly outlines the steps. The next few section will go through each step in the flow chart in detail. @@ -894,6 +933,66 @@ For the example CSV file used to introduce this step, the converted PMML file wo ``` +### Linear Regression Step + +This involves converting the beta coefficients file into the XML nodes to add to the document. The steps for this part are: + +![](./images/linear-regression-step.png) +Things to note: + +1. The following attributes need to be added to the `GeneralRegressionModel` + * `modeType` set to `generalLinear` + * `functionName` set to `regression` +2. The following attributes need to be set for each `MiningField` nodes + * `name`: Set to the name of the starting variable. For the extra node added for the predicted outcome, the value would be `linear_regression_outcome`. + * `usageType`: Set to `active`. For the extra node added for the predicted outcome, the value would be `target`. +3. The following attributes need to be set to the `Parameter` node + * `name` set to `p` where index is the row number of the variable in the file. For the extra node added for the intercept, this value would be `p0` + * `label` set to the value of the variable column. For the extra node added for the intercept, this value would be `Intercept` +4. The following attributes need to be set for each `Predictor` node + * `name` set to the value of the variable column +5. The following attributes need to be set for each `PPCell` node + * `value` set to `1` + * `predictorName` set to the value in the variable column + * `parameterName` set to the value of the name attribute for the Parameter node for this variable +6. The following attributes need to be set for each `PCell` node + * `parameterName`: Set to the name attribute of the Parameter node for this variable. Remember that for the intercept term, this would be `p0` + * `beta`: Set to the value of the coefficient column + +For the example CSV file used to introduce this step, the converted PMML file would be, + +```{xml} + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + # Implementation The skeleton for the exported function to implement the above business logic is given below, diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/beta-coefficients.csv b/assets/specs/model-csv-to-pmml/test-files/linear-regression/beta-coefficients.csv new file mode 100644 index 0000000..14dde07 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/beta-coefficients.csv @@ -0,0 +1,6 @@ +variable,coefficient,type +ALW_2A1,0.1,cont +DHHGAGE_cont,0.69,cont +ADL_01,0.56,cat +DPS_04,1.54,cat +variable_five,2,cat diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/custom-functions.R b/assets/specs/model-csv-to-pmml/test-files/linear-regression/custom-functions.R new file mode 100644 index 0000000..f77a0d4 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/custom-functions.R @@ -0,0 +1,3 @@ +variable_five.fun <- function(a, b) { + return(a + b) +} \ No newline at end of file diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/expected-pmml.xml b/assets/specs/model-csv-to-pmml/test-files/linear-regression/expected-pmml.xml new file mode 100644 index 0000000..48c581f --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/expected-pmml.xml @@ -0,0 +1,621 @@ + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + 1 + + + + + + 10 + + + + 10 + + + + 1 + + + + + + 10 + + + + 10 + + + + + 20 + + + 2 + + + + + 20 + + + + 30 + + + 3 + + + + + 30 + + + + + 40 + + + + 40 + + + + 4 + + + + 996 + + NA::a + + + + + + 997 + + + + 997 + + + + + + 999 + + + + 999 + + + + NA::b + NA::b + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + 13 + + + + 2 + + 17 + + + + 3 + + 22 + + + + 96 + + NA::a + + + + + + 97 + + + + 97 + + + + + + 99 + + + + 99 + + + + NA::b + NA::b + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + 1 + + + + 2 + + 2 + + + + 6 + + NA::a + + + + + + 7 + + + + 7 + + + + + + 9 + + + + 9 + + + + NA::b + NA::b + + + + + + + + + + + + + + + + + + + + + + + + 1 + + 1 + + + + 2 + + 2 + + + + 3 + + 3 + + + + 6 + + NA::a + + + + + + 7 + + + + 7 + + + + + + 9 + + + + 9 + + + + NA::b + NA::b + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-export.csv b/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-export.csv new file mode 100644 index 0000000..59d5dc2 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-export.csv @@ -0,0 +1,4 @@ +fileType,filePath +variables,./variables.csv +variable-details,./variable-details.csv +model-steps,./model-steps.csv diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-steps.csv b/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-steps.csv new file mode 100644 index 0000000..dba8895 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/model-steps.csv @@ -0,0 +1,2 @@ +step,fileType,filePath +linear-regression,,./beta-coefficients.csv diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/variable-details.csv b/assets/specs/model-csv-to-pmml/test-files/linear-regression/variable-details.csv new file mode 100644 index 0000000..0ef00ce --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/variable-details.csv @@ -0,0 +1,27 @@ +variable,dummyVariable,typeEnd,databaseStart,variableStart,typeStart,recEnd,numValidCat,catLabel,catLabelLong,units,recStart,catStartLabel,variableStartShortLabel,variableStartLabel,notes +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,1,N/A,Between 1 and 10 drinks on Sunday,Between 1 and 10 drinks on Sunday,drinks,"[1,10]",Number of drinks on Sunday,DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,2,N/A,Between 10 and 20 (not included) on Sunday,Between 10 and 20 (not included) on Sunday,drinks,"[10,20)",Number of drinks on Sunday,DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,3,N/A,Between 20 and 30 drinks (not including both ends) on Sunday,Between 20 and 30 drinks (not including both ends) on Sunday,drinks,"(20,30)",Number of drinks on Sunday,DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,4,N/A,Between 30 (not included) and 40 drinks on Sunday,Between 30 (not included) and 40 drinks on Sunday,drinks,"(30,40]",Number of drinks on Sunday,DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,NA::a,N/A,not applicable,not applicable,drinks,996,not applicable,DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,NA::b,N/A,missing,missing,drinks,"[997,999]",don't know (997); refusal (998); not stated (999),DailyConsumptionSunday,Number of drinks on Sunday, +ALW_2A1,N/A,cont,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]",cont,NA::b,N/A,missing,missing,drinks,else,else,DailyConsumptionSunday,Number of drinks on Sunday, +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,13,N/A,Age,converted categorical age (12 to 14),years,1,12 to 14 years,Age,Age - (G), +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,17,N/A,Age,converted categorical age (15 to 19),years,2,15 to 19 years,Age,Age - (G), +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,22,N/A,Age,converted categorical age (20 to 24),years,3,20 To 24 Years,Age,Age - (G), +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,NA::a,N/A,not applicable,not applicable,years,96,not applicable,Age,Age - (G), +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,NA::b,N/A,missing,missing,years,"[97,99]",don't know (97); refusal (98); not stated (99),Age,Age - (G),"Not applicable, don't know, refusal, not stated (96-99) were options in CCHS 2003, but had zero responses" +DHHGAGE_cont,N/A,cont,"cchs2001_p, cchs2003_p","cchs2001_p::DHHGAGE, cchs2003_p::DHHCGAGE",cat,NA::b,N/A,missing,missing,years,else,else,Age,Age - (G), +ADL_01,ADL_01_cat2_1,cat,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]",cat,1,2,Yes,Yes,N/A,1,Yes,Needs help - preparing meals,"Because of any condition or health problem, do you need the help of another person in preparing meals?", +ADL_01,ADL_01_cat2_2,cat,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]",cat,2,2,No,No,N/A,2,No,Needs help - preparing meals,"Because of any condition or health problem, do you need the help of another person in preparing meals?", +ADL_01,ADL_01_cat2_NA::a,cat,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]",cat,NA::a,2,not applicable,not applicable,N/A,6,not applicable,Needs help - preparing meals,"Because of any condition or health problem, do you need the help of another person in preparing meals?", +ADL_01,ADL_01_cat2_NA::b,cat,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]",cat,NA::b,2,missing,missing,N/A,"[7,9]",don't know (7); refusal (8); not stated (9),Needs help - preparing meals,"Because of any condition or health problem, do you need the help of another person in preparing meals?", +ADL_01,ADL_01_cat2_NA::b,cat,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]",cat,NA::b,2,missing,missing,N/A,else,else,Needs help - preparing meals,"Because of any condition or health problem, do you need the help of another person in preparing meals?", +DPS_04,DPS_04_cat3_1,cat,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,1,3,Every day,Every day,N/A,1,Every day,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +DPS_04,DPS_04_cat3_2,cat,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,2,3,Almost every day,Almost every day,N/A,2,Almost every day,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +DPS_04,DPS_04_cat3_3,cat,"cchs2001_p,cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,3,3,Less often,Less often,N/A,3,Less often,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +DPS_04,DPS_04_cat3_NA::a,cat,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,NA::a,3,not applicable,not applicable,N/A,6,not applicable,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +DPS_04,DPS_04_cat3_NA::b,cat,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,NA::b,3,missing,missing,N/A,"[7,9]",don't know (7); refusal (8); not stated (9),Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +DPS_04,DPS_04_cat3_NA::b,cat,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1],cat,NA::b,3,missing,missing,N/A,else,else,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?", +variable_five,N/A,cat,cchs2001_p,"DerivedVar::[ADL_01, DPS_04]",N/A,Func::variable_five.fun,1,Category one,Category one,N/A,N/A,N/A,N/A,N/A, +variable_five,N/A,cat,cchs2001_p,"DerivedVar::[ADL_01, DPS_04]",N/A,Func::variable_five.fun,1,Category one,Category one,N/A,N/A,N/A,N/A,N/A, diff --git a/assets/specs/model-csv-to-pmml/test-files/linear-regression/variables.csv b/assets/specs/model-csv-to-pmml/test-files/linear-regression/variables.csv new file mode 100644 index 0000000..655add4 --- /dev/null +++ b/assets/specs/model-csv-to-pmml/test-files/linear-regression/variables.csv @@ -0,0 +1,6 @@ +variable,label,labelLong,section,subject,variableType,units,databaseStart,variableStart +ALW_2A1,# of drinks - Sunday,Number of drinks on Sunday,Health behaviour,Alcohol,Continuous,drinks,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::ALCA_5A1, cchs2003_p::ALCC_5A1, cchs2005_p::ALCE_5A1, [ALW_2A1]" +DHHGAGE_cont,Age,Converted categorical age,Demographics,Age,Continuous,Years,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::DHHAGAGE, cchs2003_p::DHHCGAGE, cchs2005_p::DHHEGAGE, [DHHGAGE]" +ADL_01,Help preparing meals,Needs help - preparing meals,Health status,ADL,Categorical,N/A,"cchs2001_p, cchs2003_p, cchs2005_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p, cchs2013_2014_p, cchs2014_p","cchs2001_p::RACA_6A, cchs2003_p::RACC_6A, cchs2005_p::RACE_6A, cchs2007_2008_p::RAC_6A, [ADL_01]" +DPS_04,Sad/depressed - frequency - 2 wk,"How often did you feel this way during those 2 weeks: (every day, almost every day, or less often)?",Health status,Chronic condition,Categorical,N/A,"cchs2001_p, cchs2007_2008_p, cchs2009_2010_p, cchs2010_p, cchs2011_2012_p, cchs2012_p",[DPS_04_1] +variable_five,Variable five,Variable five,N/A,N/A,Categorical,N/A,cchs2001_p,"DerivedVar::[ADL_01, DPS_04]" diff --git a/tests/testthat/test-export_model_to_pmml.R b/tests/testthat/test-export_model_to_pmml.R index 0ea2558..7dce8d2 100644 --- a/tests/testthat/test-export_model_to_pmml.R +++ b/tests/testthat/test-export_model_to_pmml.R @@ -59,4 +59,13 @@ test_that("Correctly converts a fine and gray step file into PMML", { ) expect_xml_equal(test_dir, "cchs2001_p", custom_function_files) +}) + +test_that("Correctly converts a linear regression step into PMML", { + test_dir <- "../../assets/specs/model-csv-to-pmml/test-files/linear-regression/" + custom_function_files <- c( + file.path(normalizePath(test_dir), "custom-functions.R") + ) + + expect_xml_equal(test_dir, "cchs2001_p", custom_function_files) }) \ No newline at end of file