MLOps doesn't have to be complicated! While enterprise MLOps involves complex pipelines, monitoring, and infrastructure, you can get started with basic model deployment using modern tools that make the process incredibly simple.
LitServe is the new kid on the block from Lightning AI that's gaining massive traction in the ML community. It promises 20x faster serving than traditional FastAPI setups and requires minimal code to get your models into production.
In this tutorial, we'll build a complete MLOps pipeline using:
Scikit-learn for model training
Joblib for model persistence
LitServe for high-performance model serving
Why LitServe Over FastAPI?
Before diving in, let's understand why LitServe is creating such buzz:
Feature
FastAPI
LitServe
Performance
Good
20x faster (batching)
Setup Complexity
Medium
Minimal
Auto-batching
Manual
Built-in
GPU Support
Manual
Native
Streaming
Custom
Built-in
Load Balancing
External
Built-in
LitServe handles the hard parts of ML serving automatically!
Prerequisites
Make sure you have Python 3.8+ installed, then let's set up our environment:
First, let's create a simple dataset and train a scikit-learn model:
# src/train_model.pyimport pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os
defgenerate_sample_data():"""Generate sample house price data for demonstration""" np.random.seed(42) n_samples =1000# Features: size, bedrooms, age, location_score size = np.random.normal(2000,500, n_samples) bedrooms = np.random.randint(1,6, n_samples) age = np.random.randint(0,50, n_samples) location_score = np.random.uniform(1,10, n_samples)# Target: house price (simplified formula) price =( size *150+ bedrooms *10000+(50- age)*1000+ location_score *5000+ np.random.normal(0,20000, n_samples)) df = pd.DataFrame({'size_sqft': size,'bedrooms': bedrooms,'age_years': age,'location_score': location_score,'price': price
})return df
deftrain_model():"""Train a house price prediction model"""print("🏠 Generating sample house price data...") df = generate_sample_data()# Save sample data os.makedirs('data', exist_ok=True) df.to_csv('data/sample_data.csv', index=False)print(f"📊 Saved {len(df)} samples to data/sample_data.csv")# Prepare features and target X = df[['size_sqft','bedrooms','age_years','location_score']] y = df['price']# Split the data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)# Scale features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test)# Train modelprint("🤖 Training Random Forest model...") model = RandomForestRegressor( n_estimators=100, max_depth=10, random_state=42, n_jobs=-1) model.fit(X_train_scaled, y_train)# Evaluate model y_pred = model.predict(X_test_scaled) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred)print(f"📈 Model Performance:")print(f" - MSE: {mse:,.0f}")print(f" - R² Score: {r2:.3f}")print(f" - RMSE: {np.sqrt(mse):,.0f}")# Save model and scaler os.makedirs('models', exist_ok=True) model_data ={'model': model,'scaler': scaler,'feature_names': X.columns.tolist(),'metrics':{'mse': mse,'r2': r2,'rmse': np.sqrt(mse)}} joblib.dump(model_data,'models/trained_model.joblib')print("💾 Model saved to models/trained_model.joblib")return model_data
if __name__ =="__main__": model_data = train_model()print("✅ Training completed successfully!")
Step 2: Create LitServe Model Server
Now let's create our high-performance model server using LitServe:
# src/serve_model.pyimport litserve as ls
import joblib
import pandas as pd
import numpy as np
from typing import List, Dict, Any
classHousePricePredictionAPI(ls.LitAPI):"""LitServe API for house price prediction"""defsetup(self, device):"""Load the trained model and scaler"""print("🔄 Loading trained model...")# Load model data self.model_data = joblib.load('models/trained_model.joblib') self.model = self.model_data['model'] self.scaler = self.model_data['scaler'] self.feature_names = self.model_data['feature_names']print(f"✅ Model loaded successfully!")print(f"📊 Features: {self.feature_names}")print(f"📈 Model R² Score: {self.model_data['metrics']['r2']:.3f}")defdecode_request(self, request: Dict[str, Any])-> pd.DataFrame:"""Convert request to DataFrame for prediction"""# Handle single predictionif'features'in request: features = request['features']ifisinstance(features,dict):# Single house: {"features": {"size_sqft": 2000, ...}} df = pd.DataFrame([features])elifisinstance(features,list)andisinstance(features[0],dict):# Multiple houses: {"features": [{"size_sqft": 2000, ...}, ...]} df = pd.DataFrame(features)else:raise ValueError("Features must be a dict or list of dicts")else:# Direct format: {"size_sqft": 2000, "bedrooms": 3, ...} df = pd.DataFrame([request])# Validate features missing_features =set(self.feature_names)-set(df.columns)if missing_features:raise ValueError(f"Missing features: {missing_features}")# Reorder columns to match training data df = df[self.feature_names]return df
defpredict(self, features_df: pd.DataFrame)-> np.ndarray:"""Make predictions using the trained model"""# Scale features features_scaled = self.scaler.transform(features_df)# Make predictions predictions = self.model.predict(features_scaled)return predictions
defencode_response(self, predictions: np.ndarray)-> Dict[str, Any]:"""Format predictions for response"""# Convert to regular Python types for JSON serialization predictions_list = predictions.tolist()iflen(predictions_list)==1:# Single predictionreturn{"prediction": predictions_list[0],"formatted_price":f"${predictions_list[0]:,.0f}","model_info":{"r2_score": self.model_data['metrics']['r2'],"rmse": self.model_data['metrics']['rmse']}}else:# Multiple predictionsreturn{"predictions": predictions_list,"formatted_prices":[f"${p:,.0f}"for p in predictions_list],"count":len(predictions_list),"model_info":{"r2_score": self.model_data['metrics']['r2'],"rmse": self.model_data['metrics']['rmse']}}defstart_server():"""Start the LitServe server"""# Create API instance api = HousePricePredictionAPI()# Create server with configuration server = ls.LitServer( api, accelerator="auto",# Automatically detect GPU/CPU max_batch_size=32,# Enable auto-batching batch_timeout=0.05,# 50ms batch timeout)print("🚀 Starting LitServe server...")print("📡 Server will be available at: http://localhost:8000")print("📚 API docs at: http://localhost:8000/docs")# Start server server.run(port=8000, num_api_servers=1)if __name__ =="__main__": start_server()
Step 3: Test the API
Let's create a test script to verify our API works correctly:
# src/test_api.pyimport requests
import json
import time
deftest_single_prediction():"""Test single house price prediction"""print("🏠 Testing single house prediction...") url ="http://localhost:8000/predict"# Sample house data house_data ={"size_sqft":2500,"bedrooms":4,"age_years":5,"location_score":8.5}try: response = requests.post(url, json=house_data) response.raise_for_status() result = response.json()print(f"✅ Prediction successful!")print(f" Predicted Price: {result['formatted_price']}")print(f" Model R² Score: {result['model_info']['r2_score']:.3f}")except requests.exceptions.RequestException as e:print(f"❌ Error: {e}")deftest_batch_prediction():"""Test batch house price prediction"""print("\n🏘️ Testing batch predictions...") url ="http://localhost:8000/predict"# Multiple houses houses_data ={"features":[{"size_sqft":1500,"bedrooms":2,"age_years":10,"location_score":6.0},{"size_sqft":3000,"bedrooms":5,"age_years":1,"location_score":9.0},{"size_sqft":2000,"bedrooms":3,"age_years":15,"location_score":7.5}]}try: start_time = time.time() response = requests.post(url, json=houses_data) response.raise_for_status() end_time = time.time() result = response.json()print(f"✅ Batch prediction successful!")print(f" Houses processed: {result['count']}")print(f" Processing time: {(end_time - start_time)*1000:.1f}ms")for i, price inenumerate(result['formatted_prices']):print(f" House {i+1}: {price}")except requests.exceptions.RequestException as e:print(f"❌ Error: {e}")deftest_api_info():"""Test API health and info endpoints"""print("\n📊 Testing API info...")try:# Health check response = requests.get("http://localhost:8000/health")if response.status_code ==200:print("✅ Server is healthy")# API documentationprint("📚 API documentation available at: http://localhost:8000/docs")except requests.exceptions.RequestException as e:print(f"❌ Health check failed: {e}")defbenchmark_api(num_requests=100):"""Simple benchmark of the API"""print(f"\n⚡ Benchmarking API with {num_requests} requests...") url ="http://localhost:8000/predict" house_data ={"size_sqft":2000,"bedrooms":3,"age_years":10,"location_score":7.0} start_time = time.time() successful_requests =0for i inrange(num_requests):try: response = requests.post(url, json=house_data)if response.status_code ==200: successful_requests +=1except:pass end_time = time.time() total_time = end_time - start_time
print(f"📈 Benchmark Results:")print(f" Total requests: {num_requests}")print(f" Successful: {successful_requests}")print(f" Total time: {total_time:.2f}s")print(f" Requests/second: {successful_requests/total_time:.1f}")print(f" Average latency: {(total_time/successful_requests)*1000:.1f}ms")if __name__ =="__main__":print("🧪 Testing LitServe House Price Prediction API")print("="*50)# Run tests test_single_prediction() test_batch_prediction() test_api_info() benchmark_api(50)# Light benchmarkprint("\n✅ All tests completed!")
Step 4: Requirements File
Create a requirements file for easy environment setup:
🏠 Generating sample house price data...📊 Saved1000 samples to data/sample_data.csv🤖 TrainingRandomForest model...📈 ModelPerformance:-MSE:394,856,789-R² Score:0.892-RMSE:19,871💾 Model saved to models/trained_model.joblib✅ Training completed successfully!
Starting the Server
# Start the LitServe serverpython src/serve_model.py
Expected output:
🔄 Loading trained model...✅ Model loaded successfully!📊 Features:['size_sqft','bedrooms','age_years','location_score']📈 ModelR² Score:0.892🚀 StartingLitServe server...📡 Server will be available at: http://localhost:8000📚 API docs at: http://localhost:8000/docs
Testing the API
In another terminal:
# Test the APIpython src/test_api.py
Advanced Features
Adding Model Versioning
# Enhanced model saving with versioningimport datetime
defsave_model_with_version(model_data):"""Save model with timestamp version""" timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") version_path =f'models/model_v{timestamp}.joblib'# Add version info model_data['version']= timestamp
model_data['created_at']= datetime.datetime.now().isoformat() joblib.dump(model_data, version_path)# Also save as latest joblib.dump(model_data,'models/trained_model.joblib')return version_path
Adding Input Validation
# Enhanced request validationfrom pydantic import BaseModel, Field
from typing import Optional
classHouseFeatures(BaseModel): size_sqft:float= Field(..., gt=0, le=10000, description="House size in square feet") bedrooms:int= Field(..., ge=1, le=10, description="Number of bedrooms") age_years:float= Field(..., ge=0, le=100, description="Age of house in years") location_score:float= Field(..., ge=1, le=10, description="Location quality score 1-10")classPredictionRequest(BaseModel): features: HouseFeatures
classBatchPredictionRequest(BaseModel): features:list[HouseFeatures]
Add Advanced Monitoring: Prometheus, Grafana, or MLflow
LitServe makes ML serving incredibly simple while maintaining production-grade performance. It's the perfect tool for getting your models into production quickly without sacrificing speed or reliability!