From dde89e954588bc1619a43dd2ef7759a6aa54da3d Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 11:17:05 +0530 Subject: [PATCH 01/17] Create twitter-sentiment-analysis.mdx --- examples/twitter-sentiment-analysis.mdx | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/twitter-sentiment-analysis.mdx diff --git a/examples/twitter-sentiment-analysis.mdx b/examples/twitter-sentiment-analysis.mdx new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/twitter-sentiment-analysis.mdx @@ -0,0 +1 @@ + From 0312dc2656b7d4cbdca9f6a23909d001cfdce85e Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 11:45:40 +0530 Subject: [PATCH 02/17] Update twitter-sentiment-analysis.mdx --- examples/twitter-sentiment-analysis.mdx | 365 ++++++++++++++++++++++++ 1 file changed, 365 insertions(+) diff --git a/examples/twitter-sentiment-analysis.mdx b/examples/twitter-sentiment-analysis.mdx index 8b13789..4a96848 100644 --- a/examples/twitter-sentiment-analysis.mdx +++ b/examples/twitter-sentiment-analysis.mdx @@ -1 +1,366 @@ +--- +title: Getting Started with Twitter Sentiment Analysis Using LSTM +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install numpy==1.23.5 pandas==1.5.3 nltk==3.8.1 keras==2.12.0 tensorflow==2.12.0 +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +pureml==0.3.8 +numpy==1.23.5 +pandas==1.5.3 +nltk==3.8.1 +tensorflow==2.12.0 +keras==2.12.0 +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## Download and load your dataset + +Download your dataset from [here](https://www.kaggle.com/code/josephassaker/intro-to-deep-learning-sentiment-classification/input). + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +import pureml +from pureml.decorators import dataset,load_data,transformer,model +from sklearn.model_selection import train_test_split +import pandas as pd +import numpy as np +import pureml +from nltk import WordNetLemmatizer, pos_tag +import re, string +from nltk.tokenize import TweetTokenizer + +@load_data() +def load_dataset(): + df_raw = pd.read_csv('data.csv', encoding="ISO-8859-1", header=None) # Define the path the csv file. + df_raw.columns = ["label", "time", "date", "query", "username", "text"] + df = df_raw[['label', 'text']] + df_pos = df[df['label'] == 4] + df_neg = df[df['label'] == 0] + df_pos = df_pos.iloc[:int(len(df_pos) / 2048)] # Retain the data as per your computation + df_neg = df_neg.iloc[:int(len(df_neg) / 2048)] + df = pd.concat([df_pos, df_neg]) + return df + +``` + +## Preprocess the data + +Run the following to download GLoVe Word Embedding +```bash +!wget http://nlp.stanford.edu/data/glove.6B.zip +!unzip glove.6B.zip +``` +We can add a few more functions to preprocess the data. We will use the @transformer() decorator from PureML SDK. + + +```python +@transformer() +def read_data(df): + tk = TweetTokenizer(reduce_len=True) + data = [] + X = df['text'].tolist() + Y = df['label'].tolist() + for x, y in zip(X, Y): + if y == 4: + data.append((tk.tokenize(x), 1)) + else: + data.append((tk.tokenize(x), 0)) + return data + +@transformer() +def cleaned(token): + if token == 'u': + return 'you' + if token == 'r': + return 'are' + if token == 'some1': + return 'someone' + if token == 'yrs': + return 'years' + if token == 'hrs': + return 'hours' + if token == 'mins': + return 'minutes' + if token == 'secs': + return 'seconds' + if token == 'pls' or token == 'plz': + return 'please' + if token == '2morow': + return 'tomorrow' + if token == '2day': + return 'today' + if token == '4got' or token == '4gotten': + return 'forget' + if token == 'amp' or token == 'quot' or token == 'lt' or token == 'gt' or token == '½25': + return '' + return token + + @transformer() + def list_to_dict(cleaned_tokens): + return dict([token, True] for token in cleaned_tokens) + + @transformer() +def remove_noise(tweet_tokens): + cleaned_tokens = [] + for token, tag in pos_tag(tweet_tokens): + token = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|'\ + '(?:%[0-9a-fA-F][0-9a-fA-F]))+','', token) + token = re.sub("(@[A-Za-z0-9_]+)","", token) + if tag.startswith("NN"): + pos = 'n' + elif tag.startswith('VB'): + pos = 'v' + else: + pos = 'a' + lemmatizer = WordNetLemmatizer() + token = lemmatizer.lemmatize(token, pos) + cleaned_token = cleaned(token.lower()) + if cleaned_token not in string.punctuation and len(cleaned_token) > 2 and cleaned_token not in STOP_WORDS: + cleaned_tokens.append(cleaned_token) + return cleaned_tokens + +@transformer() +def cleaned_token_list(data): + final_data = [] + for tokens, label in data: + final_data.append((list_to_dict(tokens), label)) + print("Final Data created") + cleaned_tokens_list = [] + for tokens, label in final_data: + cleaned_tokens_list.append((remove_noise(tokens), label)) + return cleaned_tokens_list + +@transformer() +def read_glove_vecs(glove_file): + with open(glove_file, 'r', encoding="utf8") as f: + words = set() + word_to_vec_map = {} + for line in f: + line = line.strip().split() + curr_word = line[0] + words.add(curr_word) + word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64) + + i = 1 + words_to_index = {} + index_to_words = {} + for w in sorted(words): + words_to_index[w] = i + index_to_words[i] = w + i = i + 1 + return words_to_index, index_to_words, word_to_vec_map + +@transformer() +def cleared(word): + res = "" + prev = None + for char in word: + if char == prev: continue + prev = char + res += char + return res + +@transformer() +def sentence_to_indices(sentence_words, word_to_index, max_len, i,X): + unks = [] + UNKS = [] + sentence_indices = [] + for j, w in enumerate(sentence_words): + try: + index = word_to_index[w] + except: + UNKS.append(w) + w = cleared(w) + try: + index = word_to_index[w] + except: + index = word_to_index['unk'] + unks.append(w) + X[i, j] = index +``` + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +@dataset(label='nlpexample_docs:development',upload=True) +def create_dataset(): + df = load_dataset() + print(f"DF Created. {len(df)}") + data = read_data(df) + print(f"Data Created: {len(data)}") + cleaned_tokens_list = cleaned_token_list(data) + print(f"Cleaned_token_list Created. {len(cleaned_tokens_list)}") + word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt') # Define path to the GLoVe Word Embedding file + print(f"Read Gloves Created. {word_to_index['hello']}") + list_len = [len(i) for i, j in cleaned_tokens_list] + max_len = max(list_len) + print(f"max_len. {max_len}") + X = np.zeros((len(cleaned_tokens_list), max_len)) + Y = np.zeros((len(cleaned_tokens_list), )) + print(f"X & Y Created. {len(X)} & {len(Y)}") + for i, tk_lb in enumerate(cleaned_tokens_list): + tokens, label = tk_lb + sentence_to_indices(tokens, word_to_index, max_len, i,X) + Y[i] = label + print(f"{len(X)} & {len(Y)}") + x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0, stratify=Y) + print(f"{len(x_train)} & {len(x_test)} & {len(y_train)} & {len(y_test)}") + return {"x_train":x_train,"x_test":x_test,"y_train":y_train,"y_test":y_test,"max_len":max_len,"word_to_index":word_to_index," index_to_word": index_to_word,"word_to_vec_map": word_to_vec_map} + +create_dataset() +df = pureml.dataset.fetch(label='nlpexample_docs:development:v1') +x_test = df['x_test'] +y_test = df['y_test'] +x_train = df['x_train'] +y_train = df['y_train'] +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +import keras +from keras import Sequential +from keras.models import Model +from keras.layers import Dense, Dropout, LSTM, Bidirectional +from tensorflow.keras.layers import Embedding +from tensorflow.keras.layers import Embedding +``` +```python +# Function that will initialize and populate our embedding layer + +def pretrained_embedding_layer(word_to_vec_map, word_to_index, max_len): + vocab_len = len(word_to_index) + 1 + emb_dim = word_to_vec_map["unk"].shape[0] #50 + + emb_matrix = np.zeros((vocab_len, emb_dim)) + + for word, idx in word_to_index.items(): + emb_matrix[idx, :] = word_to_vec_map[word] + + embedding_layer = Embedding(vocab_len, emb_dim, trainable=False, input_shape=(max_len,)) + embedding_layer.build((None,)) + embedding_layer.set_weights([emb_matrix]) + + return embedding_layer +``` + +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +df = pureml.dataset.fetch(label='nlpexample_docs:development:v1') # Fetching the Dataset using pureml.dataset.fetch() +x_train = df['x_train'] +x_test = df['x_test'] +y_train = df['y_train'] +y_test = df['y_test'] +max_len = df['max_len'] +max_len = df['max_len'] + +@model(label='nlpexample_docs:model') +def create_model(): + + model = Sequential() + word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt') + model.add(pretrained_embedding_layer(word_to_vec_map, word_to_index, df['max_len'])) + model.add(Bidirectional(LSTM(units=128, return_sequences=True))) + model.add(Bidirectional(LSTM(units=128, return_sequences=False))) + model.add(Dense(units=1, activation='sigmoid')) + model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + model.fit(df['x_train'], df['y_train'], validation_data=(df['x_test'], df['y_test']), epochs = 1, batch_size = 64, shuffle=True) + return model + +create_model() +``` + +Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store your prediction logic +```python +from pureml import BasePredictor,Input,Output +import pureml + +class Predictor(BasePredictor): + label = 'nlpexample_docs:model:v1' + input = Input(type = 'numpy ndarray') + output = Output(type = 'numpy ndarray') + + def load_models(self): + self.model = pureml.model.fetch(self.label) + + def predict(self, data): + prediction = self.model.predict(data) + threshold = 0.4 + prediction = np.where(prediction > threshold,1,0) + prediction = np.squeeze(prediction) + return prediction +``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml + +pureml.predict.add(label='nlpexample_docs:model:v1',paths={'predict':'predict.py'}) +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(task_type='classification', + label_dataset='nlpexample_docs:development:v1', + label_model='nlpexample_docs:model:v1') +``` From f2bd1bbbf62a194daa08731e4c09f07025cce68f Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:04:28 +0530 Subject: [PATCH 03/17] Create Paris-House-Dataset.mdx --- examples/Paris-House-Dataset.mdx | 202 +++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 examples/Paris-House-Dataset.mdx diff --git a/examples/Paris-House-Dataset.mdx b/examples/Paris-House-Dataset.mdx new file mode 100644 index 0000000..a052126 --- /dev/null +++ b/examples/Paris-House-Dataset.mdx @@ -0,0 +1,202 @@ +--- +title: Regression on Paris Housing Price Dataset +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install torch scikit-learn scikit-image numpy opencv-python xgboost pandas pytorch_tabnet +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +pytorch_tabnet +torch +scikit-learn +scikit-image +numpy +opencv-python +xgboost +pandas +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## Download and load your dataset + +Download your dataset from [here](https://www.kaggle.com/competitions/playground-series-s3e6/data). + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +import pureml +import pandas as pd +from pureml.decorators import load_data,dataset +from sklearn.model_selection import train_test_split + + +@load_data() +def load_data(): + df = pd.read_csv('data/train.csv') # change the path to your data location + return df +data = load_data() + +``` + + {" "}If you need functions to pre-process the data then we can use prueml.decorators.transformer() function{" "} + + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +def create_data(): + df = load_data() + features = ['squareMeters', 'numberOfRooms', 'hasYard', 'hasPool', 'cityPartRange', 'cityCode', 'floors', + 'numPrevOwners', 'made', 'isNewBuilt', + 'hasStormProtector', 'basement', 'attic', 'garage', 'hasStorageRoom', 'hasGuestRoom'] + y = df['price'] + x_train, x_test, y_train, y_test = train_test_split(df[features], df['price'], random_state=42) + return {"x_train": x_train, "x_test": x_test, "y_train": y_train, "y_test": y_test} + +create_data() + +``` + {" "} You can fetch the dataset using pureml.dataset.fetch() {""} +```python +df = pureml.dataset.fetch('Regression:Example3:v1') +x_test = df['x_test'] +y_test = df['y_test'] +x_train= df['x_train'] +y_train = df['y_train'] +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +import xgboost as xgb +from sklearn.metrics import mean_squared_error +import numpy as np +from pureml.decorators import model +``` + +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +df = pureml.dataset.fetch('Regression:Example3:v1') +x_test = df['x_test'] +y_test = df['y_test'] +y_train = df['y_train'] +x_train = df['x_train'] +@model(label='Regression_example_1_model:development2') +def train_model(): + MODEL_PARAMS = { + 'booster': 'gbtree', + 'learning_rate': 0.11, + 'n_estimators': 77, + 'objective': 'reg:squarederror', + 'gamma': 1, + 'max_depth': 4, + 'reg_lambda': 1, + 'reg_alpha': 1, + 'subsample': 0.85, + 'colsample_bytree': 1, + 'min_child_weight': 2, + 'seed': 42 + } + xgbr = xgb.XGBRegressor(**MODEL_PARAMS) + xgbr.fit(x_train, y_train) + ypred2 = xgbr.predict(x_test) + rmse = np.sqrt(mean_squared_error(y_test, ypred2)) + pureml.log(metrics={'RMSE': rmse}) + print(f"RMSE: {rmse}") + return xgbr +train_model() +``` + + {" "} + The `pureml.log` function is used here to log the metrics and parameters of the + model.{" "} + + +Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store your prediction logic +```python +from pureml import BasePredictor,Input,Output +import pureml + +class Predictor(BasePredictor): + label = "Regression_example_1_model:development2:v1" + input = Input(type="pandas dataframe") + output = Output(type="numpy ndarray") + + def load_models(self): + self.model = pureml.model.fetch(self.label) + + def predict(self, data): + predictions = self.model.predict(data) + + return predictions + ``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml + +pureml.predict.add(label='Regression_example_1_model:development2:v1',paths={'predict':'predict.py'}) +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(task_type='regression', + label_model='Regression_example_1_model:development2:v1', + label_dataset='Regression:Example3:v1') +``` From b310cedc0c0098e900cf1c4d9f53bf67b8ae2c4e Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:05:44 +0530 Subject: [PATCH 04/17] Update Paris-House-Dataset.mdx --- examples/Paris-House-Dataset.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Paris-House-Dataset.mdx b/examples/Paris-House-Dataset.mdx index a052126..7d10545 100644 --- a/examples/Paris-House-Dataset.mdx +++ b/examples/Paris-House-Dataset.mdx @@ -83,7 +83,7 @@ def create_data(): create_data() ``` - {" "} You can fetch the dataset using pureml.dataset.fetch() {""} + {" "} You can fetch the dataset using pureml.dataset.fetch() {" "} ```python df = pureml.dataset.fetch('Regression:Example3:v1') x_test = df['x_test'] From be6815e52d93b0f8e154af55d8383fc70c8dcd91 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:14:09 +0530 Subject: [PATCH 05/17] Create SVM-breast-cancer-dataset.mdx --- examples/SVM-breast-cancer-dataset.mdx | 178 +++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 examples/SVM-breast-cancer-dataset.mdx diff --git a/examples/SVM-breast-cancer-dataset.mdx b/examples/SVM-breast-cancer-dataset.mdx new file mode 100644 index 0000000..108a31c --- /dev/null +++ b/examples/SVM-breast-cancer-dataset.mdx @@ -0,0 +1,178 @@ +--- +title: SVM on Breast Cancer Dataset. +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install scikit-learn +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +scikit-learn +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## load your dataset + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +from sklearn.datasets import load_breast_cancer +from pureml.decorators import load_data,dataset,model +import pureml +from sklearn.model_selection import train_test_split + +@load_data() +def load_dataset(): + cancer_data = load_breast_cancer() + return cancer_data + +load_dataset() +``` + + {" "}If you need functions to pre-process the data then we can use prueml.decorators.transformer() function{" "} + + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +@dataset(label='cancerdataset:development',upload=True) +def create_dataset(): + cancer_data = load_data() + x = cancer_data.data + y = cancer_data.target + x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=41) + return {'x_train':x_train,'x_test':x_test,'y_train':y_train,'y_test':y_test} + +create_dataset() +``` + {" "} You can fetch the dataset using pureml.dataset.fetch() {" "} + +```python +df = pureml.dataset.fetch(label='cancerdataset:development:v1') +x_train = df['x_train'] +x_test = df['x_test'] +y_train = df['y_train'] +y_test = df['y_test'] +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +from sklearn.svm import SVC +``` + +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +df = pureml.dataset.fetch(label='cancerdataset:development:v1') +@model(label='cancer_svm:development') +def create_model(): + clf = SVC() + clf.fit(x_train,y_train) + y_pred = clf.predict(df['x_test']) + accuracy = clf.score(x_test,y_test) + print(f"The accuracy score: {accuracy}") + pureml.log(metrics={'accuracy':accuracy}) + return clf + +create_model() +``` + + {" "} + The `pureml.log` function is used here to log the metrics and parameters of the + model.{" "} + + +To Fetch the Model we can use pureml.model.fetch() + +```python +import pureml +pureml.model.fetch(label='cancer_svm:development:v2') +``` + +Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store your prediction logic +```python +from pureml import BasePredictor, Input, Output +import pureml + + +class Predictor(BasePredictor): + label = 'cancer_svm:development:v2' + input = Input(type="numpy ndarray") + output = Output(type="numpy ndarray") + + def load_models(self): + self.model = pureml.model.fetch(self.label) + + def predict(self, data): + predictions = self.model.predict(data) + return predictions +``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml + +pureml.predict.add(label='cancer_svm:development:v2',paths={'predict': 'predict.py'}) +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(task_type='classification', + label_model='cancer_svm:development:v2', + label_dataset='cancerdataset:development:v1') +``` From 475c43933afaedcf9789f5126c51d45fb63a4984 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:24:57 +0530 Subject: [PATCH 06/17] Create Multi-Classification-SVM --- examples/Multi-Classification-SVM | 161 ++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 examples/Multi-Classification-SVM diff --git a/examples/Multi-Classification-SVM b/examples/Multi-Classification-SVM new file mode 100644 index 0000000..893e824 --- /dev/null +++ b/examples/Multi-Classification-SVM @@ -0,0 +1,161 @@ +--- +title: Multi Classification Using SVM +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install scikit-learn +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +scikit-learn +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## Load your dataset + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +import pureml +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_wine +from pureml.decorators import load_data,dataset,model + +@load_data() +def load_dataset(): + wine_data = load_wine() + return wine_data + +``` + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +@dataset(label='multiclass_data:development',upload=True) +def create_dataset(): + wine_data = load_data() + x = wine_data.data + y = wine_data.target + x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=42) + return {'x_train':x_train,'x_test':x_test,'y_train':y_train,'y_test':y_test} + +create_dataset() +``` +You can fetch the loaded dataset using pureml.dataset.fetch() + +```python +import pureml +df = pureml.dataset.fetch('multiclass_data:development:v1') +x_train = df['x_train'] +x_test = df['x_test'] +y_train = df['y_train'] +y_test = df['y_test'] +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +from sklearn.svm import SVC +``` + +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +@model(label='multiclassification:svm') +def create_model(): + clf = SVC(kernel='linear',C=1.0,random_state=42) + clf.fit(x_train,y_train) + y_pred = clf.predict(x_test) + accuracy = clf.score(x_test,y_test) + print(f"Accuracy : {accuracy}") + pureml.log(metrics={'accuracy': accuracy},params= {'kernel':'linear'}) + return clf + +create_model() +``` + +Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store your prediction logic + +```python +from pureml import BasePredictor, Input, Output +import pureml + +class Predictor(BasePredictor): + label = 'multiclassification:svm:v1' + input = Input(type='numpy ndarray') + output = Output(type='numpy ndarray') + + def load_models(self): + self.model = pureml.model.fetch(self.label) + + def predict(self, data): + predictions = self.model.predict(data) + return predictions +``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml +pureml.predict.add(label='multiclassification:svm:v1',paths= {'predict':'predict.py'}) +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(task_type='classification', + label_model='multiclassification:svm:v1', + label_dataset='multiclass_data:development:v1') +``` From 0b66131debae626bf5d63329ddce26465f470c57 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:25:37 +0530 Subject: [PATCH 07/17] Rename Multi-Classification-SVM to Multi-Classification-SVM.mdx --- .../{Multi-Classification-SVM => Multi-Classification-SVM.mdx} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/{Multi-Classification-SVM => Multi-Classification-SVM.mdx} (100%) diff --git a/examples/Multi-Classification-SVM b/examples/Multi-Classification-SVM.mdx similarity index 100% rename from examples/Multi-Classification-SVM rename to examples/Multi-Classification-SVM.mdx From 04e97bdc1cddba6062e5bcacaa3aa3a4f88fffd1 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:26:40 +0530 Subject: [PATCH 08/17] Update Multi-Classification-SVM.mdx --- examples/Multi-Classification-SVM.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/Multi-Classification-SVM.mdx b/examples/Multi-Classification-SVM.mdx index 893e824..9f9f9a8 100644 --- a/examples/Multi-Classification-SVM.mdx +++ b/examples/Multi-Classification-SVM.mdx @@ -46,7 +46,8 @@ from pureml.decorators import load_data,dataset,model def load_dataset(): wine_data = load_wine() return wine_data - + +load_dataset() ``` ## Creating a dataset From 325dea484cd37f26ab12d695394f2861953d2bd3 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 12:41:43 +0530 Subject: [PATCH 09/17] Create Classification-Drug-Dataset.mdx --- examples/Classification-Drug-Dataset.mdx | 215 +++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 examples/Classification-Drug-Dataset.mdx diff --git a/examples/Classification-Drug-Dataset.mdx b/examples/Classification-Drug-Dataset.mdx new file mode 100644 index 0000000..c7794bc --- /dev/null +++ b/examples/Classification-Drug-Dataset.mdx @@ -0,0 +1,215 @@ +--- +title: Classification on Drug Dataset +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install numpy==1.23.5 pandas==1.5.3 scikit-learn==1.2.2 +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +numpy==1.23.5 +pandas==1.5.3 +scikit-learn==1.2.2 +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## Download and load your dataset + +Download your dataset from [here](https://www.kaggle.com/code/amryasser22/drug-densitity/input). + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +import numpy as np +import pandas as pd +import pureml +from pureml.decorators import model,dataset,load_data,transformer +from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeClassifier +from sklearn.metrics import accuracy_score + +@load_data() +def load_dataset(): + df = pd.read_csv('drug200.csv') + return df +load_dataset() +``` + +## Preprocess the data + + +We can add a few more functions to preprocess the data. We will use the @transformer() decorator from PureML SDK. + + +```python +@transformer() +def convert_bp(df): + df['BP'] = df['BP'].replace({'HIGH':2,'NORMAL':1,'LOW':0}) + return df + +@transformer() +def convert_sex(df): + df['Sex'] = df['Sex'].replace({'M':0,'F':1}) + return df + +@transformer() +def convert_Cholesterol(df): + df['Cholesterol'] = df['Cholesterol'].replace({'HIGH':1,'NORMAL':0}) + return df + +@transformer() +def convert_Drug(df): + df['Drug'] = df['Drug'].replace({'drugA': 0, 'drugB': 1, 'drugC': 2, 'drugX': 3, 'DrugY': 4}) + return df +``` + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +@dataset(label='Classification:development',upload=True) +def create_dataset(): + df = load_data() + df = convert_bp(df) + df = convert_sex(df) + df = convert_Cholesterol(df) + df = convert_Drug(df) + X = df.drop(columns = 'Drug') + y = df['Drug'] + x_train,x_test,y_train,y_test = train_test_split(X,y) + return {'x_train':x_train,'x_test':x_test,'y_train':y_train,'y_test':y_test} + +create_dataset() +``` + +You can fetch the model with pureml.dataset.fetch() + +```python +import pureml +df = pureml.dataset.fetch(label='Classification:development:v8') +x_train = df['x_train'] +x_test = df['x_test'] +y_train = df['y_train'] +y_test = df['y_test'] +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +from pureml.decorators import model +from sklearn.tree import DecisionTreeClassifier +from sklearn.metrics import accuracy_score +``` +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +df = pureml.dataset.fetch(label='Classification:development:v8') +x_train = df['x_train'] +x_test = df['x_test'] +y_train = df['y_train'] +y_test = df['y_test'] + +@model(label='Classification_model:development') +def create_model(): + clf = DecisionTreeClassifier() + clf.fit(x_train,y_train) + y_pred = clf.predict(x_test) + print(f'Accuracy : {accuracy_score(y_test,y_pred)}') + pureml.log(metrics={'Accuracy Score' : accuracy_score(y_test,y_pred)}) + return clf +create_model() + +``` +You can fetch the model using pureml.model.fetch() + +```python +import pureml +pureml.model.fetch(label='Classification_model:development:v1') +``` + +Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store your prediction logic +```python +from pureml import BasePredictor,Input,Output +import pureml + + +class Predictor(BasePredictor): + label = 'Classification_model:development:v1' + input = Input(type = "pandas dataframe") + output = Output(type = "numpy ndarray") + + def load_models(self): + self.model = pureml.model.fetch(self.label) + + def predict(self, data): + predictions = self.model.predict(data) + + return predictions +``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml + +pureml.predict.add(label='Classification_model:development:v1',paths= {'predict': 'predict.py'}) +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(task_type='classification', + label_model='Classification_model:development:v1', + label_dataset='Classification:development:v8') +``` From 4eca9709280c15630e6ca6af47a7a8f22dccd0dd Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:22:40 +0530 Subject: [PATCH 10/17] Create ImageClassification-SVM.mdx --- examples/ImageClassification-SVM.mdx | 215 +++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 examples/ImageClassification-SVM.mdx diff --git a/examples/ImageClassification-SVM.mdx b/examples/ImageClassification-SVM.mdx new file mode 100644 index 0000000..7b8324d --- /dev/null +++ b/examples/ImageClassification-SVM.mdx @@ -0,0 +1,215 @@ +--- +title: Image Classification Using SVM +--- + +## Installation + +Pureml SDK & CLI can be directly installed using pip. + +```bash +pip install pureml +``` + +## For additional project requirements we will need to install the following packages + +You can use the following command to install the packages. + +```bash +pip install numpy==1.23.5 pandas==1.5.3 Pillow==9.5.0 scikit-learn==1.2.2 + +``` + +OR + +you can create a `requirements.txt` file with the following contents + +```properties +pureml==0.3.8 +numpy==1.23.5 +pandas==1.5.3 +scikit-learn==1.2.2 +Pillow==9.5.0 +``` + +and run the following command + +```bash +pip install -r requirements.txt +``` + +## Load your dataset + +Start by creating a function to load the dataset into a DataFrame. We will use the @load_data() decorator from PureML SDK. + +```python +from sklearn.datasets import fetch_lfw_people +import pureml +from pureml.decorators import dataset,load_data,model,transformer +import pandas as pd +import numpy as np +from PIL import Image +from sklearn.model_selection import train_test_split + + +@load_data() +def load_dataset(): + face_data = fetch_lfw_people(min_faces_per_person=80) + return face_data + +face_data = load_dataset() +``` + +## Preprocess the data + +We can add a few more functions to preprocess the data. We will use the @transformer() decorator from PureML SDK. + + +```python +@transformer() +def split_data(face_data): + X = face_data.data + Y = face_data.target + x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state = 42) + return x_train,x_test,y_train,y_test +``` + +## Creating a dataset + +We can now create a dataset from the pipeline. The dataset will be created by executing the pipeline and saving the output of the last transformer in the pipeline. The dataset can be created by using the `@dataset` decorator. The decorator takes the following arguments: + +- `label`: The name of the dataset +- `upload`: If `True`, the dataset will be uploaded to the cloud. If `False`, the dataset will be saved locally. + +```python +@dataset(label='Imageclassification:data',parent='split_data',upload=True) +def create_data(): + face_data = load_dataset() + x_train,x_test,y_train,y_test = split_data(face_data) + return {"x_train":x_train,"x_test":x_test,"y_train":y_train,"y_test":y_test} + +create_data() +``` + +## Visualization + +```python +import matplotlib.pyplot as plt + +fig, ax = plt.subplots(3, 4) +for i, axi in enumerate(ax.flat): + axi.imshow(face_data.images[i], cmap='bone') + axi.set(xticks=[], yticks=[],xlabel=face_data.target_names[face_data.target[i]]) +plt.show() +``` +To Fetch the dataset we can use `pureml.dataset.fetch()` +```python + +data = pureml.dataset.fetch(label='Imageclassification:data:v1') +x_train = data['x_train'] +x_test = data['x_test'] +y_train = data['y_train'] +y_test = data['y_test'] + +``` + +## Creating a model to classify the dataset + +With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. +PureML assists you with training and tracking all of your machine learning project information, including ML models and datasets, using semantic versioning and full artifact logging. + +We can make a separate python file for the model. The model file will contain the model definition and the training code. +Let's start by adding the required imports. + +```python +from sklearn.model_selection import GridSearchCV +from sklearn.svm import SVC +``` + +The model training function can be created by using the `@model` decorator. The decorator takes the model name and branch as the argument in the format `model_name:branch_name`. + + +```python +data = pureml.dataset.fetch(label='Imageclassification:data:v1') +x_train = data['x_train'] +x_test = data['x_test'] +y_train = data['y_train'] +y_test = data['y_test'] + +@model(label='imageclassification:svm') +def model_creation(): + clf = SVC(class_weight='balanced', random_state=42) + parameters = {'C': [0.1, 1, 10],'gamma': [1e-07, 1e-08, 1e-06],'kernel' : ['rbf', 'linear'] } + grid_search = GridSearchCV(clf,parameters,n_jobs=-1,cv=5) + grid_search.fit(x_train,y_train) + pureml.log(metrics={'best_score':grid_search.best_score_},params={'parameters':grid_search.best_params_}) + return grid_search + +model_creation() +``` +To Fetch the Model we can use `pureml.model.fetch()` +```python +import pureml +pureml.model.fetch('imageclassification:svm:v4') +``` + +Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a predict.py file to store prediction logic + +```python +from pureml import BasePredictor, Input, Output +import pureml + + +class Predictor(BasePredictor): + label = 'imageclassification:svm:v1' + input = Input(type="numpy ndarray") + output = Output(type="numpy ndarray") + + def load_models(self): + self.model = pureml.model.fetch('imageclassification:svm:v4') + + + def predict(self, data): + prediction = self.model.predict(data) + return prediction + +``` + +## Add prediction to your model + +For registered models, prediction function along with its requirements and resources can be logged to be used for further processes like evaluating and packaging. + +PureML predict module has a method add. Here we are using the following arguments: + +- `label`: The name of the model (model_name:branch_name:version) +- `paths`: The path to the predict.py file and requirements.txt file. + +Our predict.py file has the script to load the model and make predictions. The requirements.txt file has the dependencies required to run the predict.py file. + + + {" "} + You can know more about the prediction process [here](../prediction/versioning){" "} + + +```python +import pureml + +pureml.predict.add(label='imageclassification:svm:v4',paths={'predict':'predict.py'}) +``` +To Fetch the Predict file you can use `pureml.predict.fetch()` +```python +import pureml +pureml.predict.fetch(label='imageclassification:svm:v4') +``` + +## Create your first Evaluation + +PureML has an eval method that runs a _task_type_ on a _label_model_ using a _label_dataset_. + +```python +import pureml +pureml.eval(label_model='imageclassification:svm:v4', + label_dataset='Imageclassification:data:v1', + task_type='classification') +``` From f8c9cc3477fc79ca26d0c3d4e3d60a366ca1514b Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:25:06 +0530 Subject: [PATCH 11/17] Update Classification-Drug-Dataset.mdx --- examples/Classification-Drug-Dataset.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/Classification-Drug-Dataset.mdx b/examples/Classification-Drug-Dataset.mdx index c7794bc..f19ce97 100644 --- a/examples/Classification-Drug-Dataset.mdx +++ b/examples/Classification-Drug-Dataset.mdx @@ -107,7 +107,7 @@ def create_dataset(): create_dataset() ``` -You can fetch the model with pureml.dataset.fetch() +To fetch the model with `pureml.dataset.fetch()` ```python import pureml @@ -152,7 +152,7 @@ def create_model(): create_model() ``` -You can fetch the model using pureml.model.fetch() +You can fetch the model using `pureml.model.fetch()` ```python import pureml @@ -161,7 +161,7 @@ pureml.model.fetch(label='Classification_model:development:v1') Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store your prediction logic +## Let's Now create a `predict.py` file to store your prediction logic ```python from pureml import BasePredictor,Input,Output import pureml From 91f8693a9397d855c0f7e8e9b0ec199ad90aabee Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:25:47 +0530 Subject: [PATCH 12/17] Update ImageClassification-SVM.mdx --- examples/ImageClassification-SVM.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ImageClassification-SVM.mdx b/examples/ImageClassification-SVM.mdx index 7b8324d..bfa67e5 100644 --- a/examples/ImageClassification-SVM.mdx +++ b/examples/ImageClassification-SVM.mdx @@ -154,7 +154,7 @@ pureml.model.fetch('imageclassification:svm:v4') Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store prediction logic +## Let's Now create a `predict.py` file to store prediction logic ```python from pureml import BasePredictor, Input, Output From a6d9ca2587eac35a2caf80ec84a6a19d9b4ea440 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:28:19 +0530 Subject: [PATCH 13/17] Update Multi-Classification-SVM.mdx --- examples/Multi-Classification-SVM.mdx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/Multi-Classification-SVM.mdx b/examples/Multi-Classification-SVM.mdx index 9f9f9a8..c991faf 100644 --- a/examples/Multi-Classification-SVM.mdx +++ b/examples/Multi-Classification-SVM.mdx @@ -15,7 +15,7 @@ pip install pureml You can use the following command to install the packages. ```bash -pip install scikit-learn +pip install scikit-learn==1.2.2 ``` OR @@ -23,7 +23,7 @@ OR you can create a `requirements.txt` file with the following contents ```properties -scikit-learn +scikit-learn==1.2.2 ``` and run the following command @@ -68,7 +68,7 @@ def create_dataset(): create_dataset() ``` -You can fetch the loaded dataset using pureml.dataset.fetch() +You can fetch the loaded dataset using `pureml.dataset.fetch()` ```python import pureml @@ -107,10 +107,15 @@ def create_model(): create_model() ``` +To Fetch the Model we can use `pureml.model.fetch()` +```python +import pureml +pureml.model.fetch(label='multiclassification:svm:v1') +``` Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store your prediction logic +## Let's Now create a `predict.py` file to store prediction logic ```python from pureml import BasePredictor, Input, Output From dda1c341ab0435caecea6aa45c5cf6c64d953a9f Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:33:00 +0530 Subject: [PATCH 14/17] Update Paris-House-Dataset.mdx --- examples/Paris-House-Dataset.mdx | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/examples/Paris-House-Dataset.mdx b/examples/Paris-House-Dataset.mdx index 7d10545..d4358fe 100644 --- a/examples/Paris-House-Dataset.mdx +++ b/examples/Paris-House-Dataset.mdx @@ -15,7 +15,7 @@ pip install pureml You can use the following command to install the packages. ```bash -pip install torch scikit-learn scikit-image numpy opencv-python xgboost pandas pytorch_tabnet +pip install torch scikit-learn==1.2.2 scikit-image numpy opencv-python xgboost pandas pytorch_tabnet ``` OR @@ -25,7 +25,7 @@ you can create a `requirements.txt` file with the following contents ```properties pytorch_tabnet torch -scikit-learn +scikit-learn==1.2.2 scikit-image numpy opencv-python @@ -83,7 +83,7 @@ def create_data(): create_data() ``` - {" "} You can fetch the dataset using pureml.dataset.fetch() {" "} +To Fetch the dataset we can you `pureml.dataset.fetch()` ```python df = pureml.dataset.fetch('Regression:Example3:v1') x_test = df['x_test'] @@ -147,9 +147,18 @@ train_model() model.{" "} -Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store your prediction logic +To fetch the model we can use `pureml.model.fetch()` + +```python +import pureml +pureml.model.fetch(label='Regression_example_1_model:development2:v1') +``` + + +Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions + +## Let's Now create a `predict.py` file to store your prediction logic ```python from pureml import BasePredictor,Input,Output import pureml From 4d0f92cc3a83db008855bd15fcae25631ac2ad29 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:35:15 +0530 Subject: [PATCH 15/17] Update SVM-breast-cancer-dataset.mdx --- examples/SVM-breast-cancer-dataset.mdx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/SVM-breast-cancer-dataset.mdx b/examples/SVM-breast-cancer-dataset.mdx index 108a31c..a625203 100644 --- a/examples/SVM-breast-cancer-dataset.mdx +++ b/examples/SVM-breast-cancer-dataset.mdx @@ -15,7 +15,8 @@ pip install pureml You can use the following command to install the packages. ```bash -pip install scikit-learn +pip install scikit-learn==1.2.2 + ``` OR @@ -23,7 +24,7 @@ OR you can create a `requirements.txt` file with the following contents ```properties -scikit-learn +scikit-learn==1.2.2 ``` and run the following command @@ -71,7 +72,7 @@ def create_dataset(): create_dataset() ``` - {" "} You can fetch the dataset using pureml.dataset.fetch() {" "} +To fetch the dataset we can `pureml.dataset.fetch()` ```python df = pureml.dataset.fetch(label='cancerdataset:development:v1') @@ -116,7 +117,7 @@ create_model() model.{" "} -To Fetch the Model we can use pureml.model.fetch() +To Fetch the Model we can use `pureml.model.fetch()` ```python import pureml @@ -125,7 +126,7 @@ pureml.model.fetch(label='cancer_svm:development:v2') Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store your prediction logic +## Let's Now create a `predict.py` file to store prediction logic ```python from pureml import BasePredictor, Input, Output import pureml From 5f2f40d80b907c02934e64508648ea98bc3fd843 Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:37:51 +0530 Subject: [PATCH 16/17] Update twitter-sentiment-analysis.mdx --- examples/twitter-sentiment-analysis.mdx | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/twitter-sentiment-analysis.mdx b/examples/twitter-sentiment-analysis.mdx index 4a96848..98f4fa1 100644 --- a/examples/twitter-sentiment-analysis.mdx +++ b/examples/twitter-sentiment-analysis.mdx @@ -245,6 +245,12 @@ x_train = df['x_train'] y_train = df['y_train'] ``` +To Fetch the dataset we can use `pureml.dataset.fetch()` +```python +import pureml +pureml.dataset.fetch(label='nlpexample_docs:development:v1') +``` + ## Creating a model to classify the dataset With the PureML model module, you can perform a variety of actions related to creating and managing models and branches. @@ -307,10 +313,15 @@ def create_model(): create_model() ``` +To Fetch the model we can use 'pureml.model.fetch()' +```python +import pureml +pureml.model.fetch(label = 'nlpexample_docs:model:v1') +``` -Once ouur training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions +Once our training is complete our model will be ready to rock and roll🎸✨. But that's too much of a hassle. So for now, let's just do some predictions -## Let's Now create a predict.py file to store your prediction logic +## Let's Now create a `predict.py` file to store prediction logic ```python from pureml import BasePredictor,Input,Output import pureml From 3c87f368baa754c22d2f184b0603a547b90e774e Mon Sep 17 00:00:00 2001 From: Thrinadh Manubothu Date: Wed, 7 Jun 2023 13:41:06 +0530 Subject: [PATCH 17/17] Update twitter-sentiment-analysis.mdx --- examples/twitter-sentiment-analysis.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/twitter-sentiment-analysis.mdx b/examples/twitter-sentiment-analysis.mdx index 98f4fa1..0d512d3 100644 --- a/examples/twitter-sentiment-analysis.mdx +++ b/examples/twitter-sentiment-analysis.mdx @@ -246,6 +246,7 @@ y_train = df['y_train'] ``` To Fetch the dataset we can use `pureml.dataset.fetch()` + ```python import pureml pureml.dataset.fetch(label='nlpexample_docs:development:v1') @@ -313,7 +314,7 @@ def create_model(): create_model() ``` -To Fetch the model we can use 'pureml.model.fetch()' +To Fetch the model we can use `pureml.model.fetch()` ```python import pureml pureml.model.fetch(label = 'nlpexample_docs:model:v1')