AI and machine learning are transforming mobile apps. This guide covers practical approaches to integrating AI/ML features into iOS and Android applications.
AI/ML Integration Options
Integration Approaches:
├── Cloud AI APIs
│ ├── Easy to implement
│ ├── Requires internet
│ ├── Pay-per-use pricing
│ └── Always up-to-date models
├── On-Device ML
│ ├── Works offline
│ ├── Better privacy
│ ├── Lower latency
│ └── App size increases
└── Hybrid Approach
├── On-device for common tasks
├── Cloud for complex tasks
└── Best of both worlds
On-Device Machine Learning
iOS: Core ML
Core ML is Apple's framework for running ML models on iPhone/iPad.
import CoreML
import Vision
// Load a Core ML model
class ImageClassifier {
private let model: VNCoreMLModel
init() throws {
let config = MLModelConfiguration()
let mlModel = try MobileNetV2(configuration: config)
model = try VNCoreMLModel(for: mlModel.model)
}
func classify(image: UIImage, completion: @escaping ([String: Double]) -> Void) {
guard let ciImage = CIImage(image: image) else { return }
let request = VNCoreMLRequest(model: model) { request, error in
guard let results = request.results as? [VNClassificationObservation] else {
return
}
let classifications = results.prefix(5).reduce(into: [:]) { dict, obs in
dict[obs.identifier] = Double(obs.confidence)
}
DispatchQueue.main.async {
completion(classifications)
}
}
let handler = VNImageRequestHandler(ciImage: ciImage)
try? handler.perform([request])
}
}
// Usage
let classifier = try ImageClassifier()
classifier.classify(image: photo) { results in
print("Top prediction: \(results.first?.key ?? "Unknown")")
}
Android: TensorFlow Lite
TensorFlow Lite enables on-device ML for Android.
import org.tensorflow.lite.Interpreter
import java.nio.ByteBuffer
class ImageClassifier(context: Context) {
private val interpreter: Interpreter
private val labels: List<String>
init {
val model = loadModelFile(context, "mobilenet_v2.tflite")
interpreter = Interpreter(model)
labels = loadLabels(context, "labels.txt")
}
private fun loadModelFile(context: Context, filename: String): ByteBuffer {
val assetManager = context.assets
val inputStream = assetManager.open(filename)
val byteArray = inputStream.readBytes()
return ByteBuffer.allocateDirect(byteArray.size).apply {
put(byteArray)
rewind()
}
}
fun classify(bitmap: Bitmap): List<Pair<String, Float>> {
val input = preprocessImage(bitmap)
val output = Array(1) { FloatArray(labels.size) }
interpreter.run(input, output)
return output[0]
.mapIndexed { index, confidence -> labels[index] to confidence }
.sortedByDescending { it.second }
.take(5)
}
private fun preprocessImage(bitmap: Bitmap): ByteBuffer {
val resized = Bitmap.createScaledBitmap(bitmap, 224, 224, true)
val buffer = ByteBuffer.allocateDirect(224 * 224 * 3 * 4)
for (y in 0 until 224) {
for (x in 0 until 224) {
val pixel = resized.getPixel(x, y)
buffer.putFloat(((pixel shr 16) and 0xFF) / 255f)
buffer.putFloat(((pixel shr 8) and 0xFF) / 255f)
buffer.putFloat((pixel and 0xFF) / 255f)
}
}
buffer.rewind()
return buffer
}
}
Cross-Platform: React Native with TensorFlow.js
import * as tf from '@tensorflow/tfjs';
import { bundleResourceIO } from '@tensorflow/tfjs-react-native';
import * as mobilenet from '@tensorflow-models/mobilenet';
// Initialize TensorFlow
async function initTensorFlow() {
await tf.ready();
console.log('TensorFlow.js ready');
}
// Load and use MobileNet
async function classifyImage(imageTensor) {
const model = await mobilenet.load();
const predictions = await model.classify(imageTensor);
return predictions.map(p => ({
label: p.className,
confidence: p.probability
}));
}
// Convert image to tensor
function imageToTensor(rawImageData, width, height) {
const imageData = new Uint8Array(rawImageData);
return tf.tidy(() => {
const tensor = tf.tensor3d(imageData, [height, width, 4], 'int32');
// Remove alpha channel and normalize
return tensor.slice([0, 0, 0], [-1, -1, 3]).div(255);
});
}
Cloud AI APIs
Google Cloud AI
// Vision API - Image Analysis
async function analyzeImage(imageBase64) {
const response = await fetch(
`https://vision.googleapis.com/v1/images:annotate?key=${API_KEY}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
requests: [{
image: { content: imageBase64 },
features: [
{ type: 'LABEL_DETECTION', maxResults: 10 },
{ type: 'FACE_DETECTION', maxResults: 5 },
{ type: 'TEXT_DETECTION' },
{ type: 'SAFE_SEARCH_DETECTION' }
]
}]
})
}
);
return response.json();
}
// Natural Language API - Sentiment Analysis
async function analyzeSentiment(text) {
const response = await fetch(
`https://language.googleapis.com/v1/documents:analyzeSentiment?key=${API_KEY}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
document: {
type: 'PLAIN_TEXT',
content: text
},
encodingType: 'UTF8'
})
}
);
const result = await response.json();
return {
score: result.documentSentiment.score, // -1 to 1
magnitude: result.documentSentiment.magnitude
};
}
AWS AI Services
import AWS from 'aws-sdk';
// Configure AWS
AWS.config.update({
region: 'us-east-1',
credentials: new AWS.CognitoIdentityCredentials({
IdentityPoolId: 'us-east-1:xxx-xxx-xxx'
})
});
// Rekognition - Object Detection
const rekognition = new AWS.Rekognition();
async function detectObjects(imageBytes) {
const params = {
Image: { Bytes: imageBytes },
MaxLabels: 10,
MinConfidence: 70
};
const result = await rekognition.detectLabels(params).promise();
return result.Labels.map(label => ({
name: label.Name,
confidence: label.Confidence,
instances: label.Instances?.length || 0
}));
}
// Comprehend - Text Analysis
const comprehend = new AWS.Comprehend();
async function analyzeText(text) {
const [sentiment, entities, keyPhrases] = await Promise.all([
comprehend.detectSentiment({
Text: text,
LanguageCode: 'en'
}).promise(),
comprehend.detectEntities({
Text: text,
LanguageCode: 'en'
}).promise(),
comprehend.detectKeyPhrases({
Text: text,
LanguageCode: 'en'
}).promise()
]);
return { sentiment, entities, keyPhrases };
}
// Transcribe - Speech to Text
const transcribe = new AWS.TranscribeService();
async function startTranscription(audioUri) {
const jobName = `transcription-${Date.now()}`;
await transcribe.startTranscriptionJob({
TranscriptionJobName: jobName,
LanguageCode: 'en-US',
MediaFormat: 'm4a',
Media: { MediaFileUri: audioUri }
}).promise();
return jobName;
}
OpenAI API Integration
// Chat Completion (GPT-4)
async function chat(messages, options = {}) {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: options.model || 'gpt-4',
messages: messages,
temperature: options.temperature || 0.7,
max_tokens: options.maxTokens || 1000
})
});
const result = await response.json();
return result.choices[0].message.content;
}
// Image Generation (DALL-E)
async function generateImage(prompt) {
const response = await fetch('https://api.openai.com/v1/images/generations', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'dall-e-3',
prompt: prompt,
n: 1,
size: '1024x1024'
})
});
const result = await response.json();
return result.data[0].url;
}
// Whisper - Speech to Text
async function transcribeAudio(audioBlob) {
const formData = new FormData();
formData.append('file', audioBlob, 'audio.m4a');
formData.append('model', 'whisper-1');
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`
},
body: formData
});
const result = await response.json();
return result.text;
}
Common AI/ML Features
1. Image Classification
Use Cases:
├── Product recognition
├── Plant/animal identification
├── Document classification
├── Content moderation
└── Medical imaging
Implementation Options:
├── Pre-trained: MobileNet, ResNet, EfficientNet
├── Custom: Train on your dataset
├── Cloud: Google Vision, AWS Rekognition
└── Hybrid: On-device + cloud fallback
2. Object Detection
// iOS Vision Framework Object Detection
import Vision
func detectObjects(in image: UIImage) {
guard let cgImage = image.cgImage else { return }
let request = VNRecognizeAnimalsRequest { request, error in
guard let results = request.results as? [VNRecognizedObjectObservation] else {
return
}
for observation in results {
print("Object: \(observation.labels.first?.identifier ?? "Unknown")")
print("Confidence: \(observation.confidence)")
print("Bounding box: \(observation.boundingBox)")
}
}
let handler = VNImageRequestHandler(cgImage: cgImage)
try? handler.perform([request])
}
3. Natural Language Processing
// Text Classification with ML Kit
import { TextClassifier } from '@react-native-ml-kit/text-classification';
async function classifyText(text) {
const classifier = await TextClassifier.createFromAsset('text_classifier.tflite');
const results = await classifier.classify(text);
return results.map(r => ({
category: r.label,
confidence: r.score
}));
}
// Named Entity Recognition
async function extractEntities(text) {
const response = await fetch(`${NLP_API_URL}/entities`, {
method: 'POST',
body: JSON.stringify({ text })
});
const entities = await response.json();
// Returns: [{ text: "Apple", type: "ORGANIZATION" }, ...]
return entities;
}
4. Speech Recognition
// iOS Speech Recognition
import Speech
class SpeechRecognizer {
private let recognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
func startRecording(onResult: @escaping (String) -> Void) throws {
let request = SFSpeechAudioBufferRecognitionRequest()
request.shouldReportPartialResults = true
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
request.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
recognitionTask = recognizer?.recognitionTask(with: request) { result, error in
if let result = result {
onResult(result.bestTranscription.formattedString)
}
}
}
func stopRecording() {
audioEngine.stop()
recognitionTask?.cancel()
}
}
5. Recommendation Systems
// Collaborative Filtering Recommendations
class RecommendationEngine {
constructor(apiUrl) {
this.apiUrl = apiUrl;
}
async getRecommendations(userId, options = {}) {
const response = await fetch(`${this.apiUrl}/recommendations`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
userId,
limit: options.limit || 10,
excludeViewed: options.excludeViewed ?? true,
context: {
location: options.location,
timeOfDay: new Date().getHours(),
device: Platform.OS
}
})
});
return response.json();
}
async recordInteraction(userId, itemId, interactionType) {
await fetch(`${this.apiUrl}/interactions`, {
method: 'POST',
body: JSON.stringify({
userId,
itemId,
type: interactionType, // 'view', 'click', 'purchase', 'like'
timestamp: new Date().toISOString()
})
});
}
}
6. Face Detection and Recognition
// Android ML Kit Face Detection
import com.google.mlkit.vision.face.FaceDetection
import com.google.mlkit.vision.face.FaceDetectorOptions
class FaceDetector {
private val options = FaceDetectorOptions.Builder()
.setPerformanceMode(FaceDetectorOptions.PERFORMANCE_MODE_FAST)
.setLandmarkMode(FaceDetectorOptions.LANDMARK_MODE_ALL)
.setClassificationMode(FaceDetectorOptions.CLASSIFICATION_MODE_ALL)
.build()
private val detector = FaceDetection.getClient(options)
fun detectFaces(bitmap: Bitmap, onResult: (List<FaceData>) -> Unit) {
val image = InputImage.fromBitmap(bitmap, 0)
detector.process(image)
.addOnSuccessListener { faces ->
val faceDataList = faces.map { face ->
FaceData(
boundingBox = face.boundingBox,
smilingProbability = face.smilingProbability,
leftEyeOpenProbability = face.leftEyeOpenProbability,
rightEyeOpenProbability = face.rightEyeOpenProbability,
headRotationY = face.headEulerAngleY,
headRotationZ = face.headEulerAngleZ
)
}
onResult(faceDataList)
}
.addOnFailureListener { e ->
Log.e("FaceDetector", "Detection failed", e)
}
}
}
data class FaceData(
val boundingBox: Rect,
val smilingProbability: Float?,
val leftEyeOpenProbability: Float?,
val rightEyeOpenProbability: Float?,
val headRotationY: Float,
val headRotationZ: Float
)
Model Optimization for Mobile
Model Size Reduction
Optimization Techniques:
├── Quantization: Float32 → Int8 (4x smaller)
├── Pruning: Remove unnecessary weights
├── Knowledge Distillation: Train smaller model
├── Architecture Search: Find efficient models
└── Weight Sharing: Reduce unique parameters
Size Comparison:
├── MobileNetV2 Full: 14 MB
├── MobileNetV2 Quantized: 3.4 MB
├── EfficientNet-B0: 21 MB
├── EfficientNet-B0 Quantized: 5.3 MB
TensorFlow Lite Optimization
# Convert and optimize TensorFlow model
import tensorflow as tf
# Load your model
model = tf.keras.models.load_model('my_model.h5')
# Create converter
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Apply optimizations
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Quantization (optional, for further size reduction)
converter.target_spec.supported_types = [tf.float16]
# Convert
tflite_model = converter.convert()
# Save
with open('model.tflite', 'wb') as f:
f.write(tflite_model)
Core ML Optimization
import coremltools as ct
# Load PyTorch model
import torch
model = torch.load('model.pt')
model.eval()
# Convert to Core ML
mlmodel = ct.convert(
model,
inputs=[ct.TensorType(shape=(1, 3, 224, 224))],
compute_precision=ct.precision.FLOAT16, # Use FP16
compute_units=ct.ComputeUnit.ALL # Use Neural Engine
)
# Quantize to Int8 (optional)
mlmodel_quantized = ct.models.neural_network.quantization_utils.quantize_weights(
mlmodel,
nbits=8
)
mlmodel_quantized.save('model.mlpackage')
Privacy Considerations
Best Practices:
├── Process data on-device when possible
├── Don't send raw images/audio to cloud
├── Use federated learning for model updates
├── Encrypt data in transit and at rest
├── Get explicit user consent
├── Provide opt-out options
├── Minimize data collection
└── Delete data after processing
Privacy-Preserving Techniques:
├── On-device processing
├── Differential privacy
├── Federated learning
├── Secure enclaves
└── Homomorphic encryption
Cost Considerations
Cloud AI Pricing
Google Cloud Vision API:
├── First 1,000 images/month: Free
├── 1,001 - 5M images: $1.50 per 1,000
└── 5M+ images: $0.60 per 1,000
AWS Rekognition:
├── First 5,000 images/month: Free tier
├── 5,001 - 1M images: $1.00 per 1,000
└── 1M+ images: $0.80 per 1,000
OpenAI API:
├── GPT-4: $30/1M input + $60/1M output tokens
├── GPT-3.5: $0.50/1M input + $1.50/1M output tokens
├── Whisper: $0.006 per minute
└── DALL-E 3: $0.04-0.12 per image
Cost Optimization
Strategies:
├── Use on-device for common operations
├── Cache API responses
├── Batch requests when possible
├── Use cheaper models for simple tasks
├── Implement rate limiting
├── Optimize image/audio size before upload
└── Monitor usage and set alerts
Conclusion
AI/ML integration in mobile apps has become accessible and practical:
- Start simple: Use pre-trained models and cloud APIs
- Consider privacy: Prefer on-device when possible
- Optimize for mobile: Size and performance matter
- Choose wisely: Cloud for complexity, on-device for privacy/speed
- Monitor costs: Cloud AI can get expensive at scale
The key is matching the right approach to your use case and constraints.
Need help integrating AI into your mobile app? Contact Hevcode for expert guidance on AI/ML implementation.