diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 137225d8..7788ed6d 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -43,7 +43,8 @@ android:exported="true" android:label="@string/app_name" android:theme="@style/Theme.Emptything" - android:launchMode="singleTop"> + android:launchMode="singleTop" + android:windowSoftInputMode="adjustResize"> diff --git a/app/src/main/kotlin/com/google/ai/sample/MainActivity.kt b/app/src/main/kotlin/com/google/ai/sample/MainActivity.kt index 10b02dc9..0e513edc 100644 --- a/app/src/main/kotlin/com/google/ai/sample/MainActivity.kt +++ b/app/src/main/kotlin/com/google/ai/sample/MainActivity.kt @@ -112,6 +112,13 @@ import okhttp3.Request import androidx.lifecycle.ViewModelProvider import androidx.lifecycle.ViewModelStoreOwner import com.google.ai.sample.GenerativeViewModelFactory +import androidx.activity.result.PickVisualMediaRequest +import android.graphics.drawable.BitmapDrawable +import android.media.MediaMetadataRetriever +import coil.ImageLoader +import coil.request.ImageRequest +import coil.request.SuccessResult +import coil.size.Precision class MainActivity : ComponentActivity() { @@ -143,6 +150,7 @@ class MainActivity : ComponentActivity() { private lateinit var mediaProjectionManager: MediaProjectionManager private lateinit var mediaProjectionLauncher: ActivityResultLauncher private lateinit var webRtcMediaProjectionLauncher: ActivityResultLauncher + private lateinit var pickMediaLauncher: ActivityResultLauncher private var currentScreenInfoForScreenshot: String? = null @@ -255,6 +263,16 @@ class MainActivity : ComponentActivity() { ) { result -> handleWebRtcMediaProjectionResult(result.resultCode, result.data) } + + pickMediaLauncher = registerForActivityResult(ActivityResultContracts.PickVisualMedia()) { uri -> + uri?.let { + Log.d(TAG, "Selected image/video URI from picker: $it") + val isVideo = contentResolver.getType(it)?.startsWith("video/") == true + webViewInstance?.post { + webViewInstance?.evaluateJavascript("window.onImagePicked('$it', $isVideo)", null) + } + } + } } private fun handleMediaProjectionResult(resultCode: Int, resultData: Intent?) { @@ -519,6 +537,17 @@ class MainActivity : ComponentActivity() { TAG, "updateTrialState: trialInfoMessage='${uiModel.infoMessage}', showTrialInfoDialog=${uiModel.shouldShowInfoDialog}" ) + + // Notify the WebView so JS can update its UI (e.g. hide the Pro button after purchase). + val isExpired = newState == TrialManager.TrialState.EXPIRED_INTERNET_TIME_CONFIRMED + val isPurchased = newState == TrialManager.TrialState.PURCHASED + val escapedMsg = escapeForJs(uiModel.infoMessage) + webViewInstance?.post { + webViewInstance?.evaluateJavascript( + "window.onTrialStateChanged && window.onTrialStateChanged($isExpired, $isPurchased, '$escapedMsg')", + null + ) + } } private val purchasesUpdatedListener = PurchasesUpdatedListener { billingResult, purchases -> @@ -594,7 +623,7 @@ class MainActivity : ComponentActivity() { private fun loadWebViewContent() { if (webViewHtmlContent != null) return - val htmlUrl = "https://raw.githubusercontent.com/Android-PowerUser/ScreenOperator/refs/heads/main/index.html" + val htmlUrl = "https://raw.githubusercontent.com/Android-PowerUser/ScreenOperator/refs/heads/feature/webview-test/index.html" lifecycleScope.launch(Dispatchers.IO) { if (webViewHtmlContent != null) return@launch try { @@ -692,6 +721,19 @@ class MainActivity : ComponentActivity() { GenerativeAISample { Scaffold { innerPadding -> val htmlContent = webViewHtmlContent + // ── Dialogs: always rendered so they float above WebView too ────────── + TrialStateDialogs( + trialState = currentTrialState, + showTrialInfoDialog = showTrialInfoDialog, + trialInfoMessage = trialInfoMessage, + onDismissTrialInfo = { + showTrialInfoDialog = false + prefs.edit().putBoolean(PREF_KEY_FIRST_LAUNCH_INFO_SHOWN, true).apply() + }, + onPurchaseClick = { initiateDonationPurchase() } + ) + // ───────────────────────────────────────────────────────────────────── + if (htmlContent != null) { Log.d(TAG, "setContent: Remote content available, showing WebView.") AndroidView( @@ -703,8 +745,8 @@ class MainActivity : ComponentActivity() { settings.javaScriptEnabled = true settings.domStorageEnabled = true settings.databaseEnabled = false - settings.allowFileAccess = false - settings.allowContentAccess = false + settings.allowFileAccess = true + settings.allowContentAccess = true settings.mixedContentMode = WebSettings.MIXED_CONTENT_NEVER_ALLOW settings.setSupportZoom(true) settings.builtInZoomControls = true @@ -722,6 +764,17 @@ class MainActivity : ComponentActivity() { override fun onPageFinished(view: WebView?, url: String?) { super.onPageFinished(view, url) Log.d(TAG, "WebView page rendered: {}".format(url)) + view?.post { + view.evaluateJavascript("window.onAndroidReady && window.onAndroidReady()", null) + // Push the current trial state so JS can update its UI on first load. + val isExpired = currentTrialState == TrialManager.TrialState.EXPIRED_INTERNET_TIME_CONFIRMED + val isPurchased = currentTrialState == TrialManager.TrialState.PURCHASED + val escapedMsg = escapeForJs(trialInfoMessage) + view.evaluateJavascript( + "window.onTrialStateChanged && window.onTrialStateChanged($isExpired, $isPurchased, '$escapedMsg')", + null + ) + } observeViewModelForWebView() } @@ -738,7 +791,7 @@ class MainActivity : ComponentActivity() { this@MainActivity.webViewInstance = this addJavascriptInterface(WebViewBridge(this@MainActivity), "Android") loadDataWithBaseURL( - "https://raw.githubusercontent.com/Android-PowerUser/ScreenOperator/refs/heads/main/", + "https://raw.githubusercontent.com/Android-PowerUser/ScreenOperator/refs/heads/feature/webview-test/", htmlContent, "text/html", "UTF-8", @@ -752,17 +805,6 @@ class MainActivity : ComponentActivity() { navController = rememberNavController() AppNavigation(navController = navController, innerPadding = innerPadding) - TrialStateDialogs( - trialState = currentTrialState, - showTrialInfoDialog = showTrialInfoDialog, - trialInfoMessage = trialInfoMessage, - onDismissTrialInfo = { - showTrialInfoDialog = false - prefs.edit().putBoolean(PREF_KEY_FIRST_LAUNCH_INFO_SHOWN, true).apply() - }, - onPurchaseClick = { initiateDonationPurchase() } - ) - if (showFirstLaunchInfoDialog) { FirstLaunchInfoDialog(onDismiss = { showFirstLaunchInfoDialog = false @@ -1289,15 +1331,75 @@ class MainActivity : ComponentActivity() { /** * Called by [WebViewBridge] when the user sends a chat message from the WebView UI. - * The WebView UI currently doesn't support attaching images, so this is always called - * with an empty image list. + * Supports passing a list of media URIs selected via the + button. */ - fun sendMessageFromWebView(text: String) { - Log.d(TAG, "sendMessageFromWebView called.") - photoReasoningViewModel?.reason( - userInput = text, - selectedImages = emptyList() - ) + fun sendMessageFromWebView(text: String, selectedImages: List) { + Log.d(TAG, "sendMessageFromWebView called with ${selectedImages.size} images.") + lifecycleScope.launch { + val bitmaps = selectedImages.mapNotNull { uri -> + uriToBitmap(uri) + } + photoReasoningViewModel?.reason( + userInput = text, + selectedImages = bitmaps, + screenInfoForPrompt = null, + imageUrisForChat = selectedImages.map { it.toString() } + ) + } + } + + private suspend fun uriToBitmap(uri: Uri): Bitmap? = withContext(Dispatchers.IO) { + val mimeType = contentResolver.getType(uri).orEmpty() + if (mimeType.startsWith("video/")) { + return@withContext extractVideoFrame(uri) + } + + val imageLoader = ImageLoader.Builder(this@MainActivity).build() + val imageRequest = ImageRequest.Builder(this@MainActivity) + .data(uri) + .precision(Precision.EXACT) + .build() + return@withContext try { + val result = imageLoader.execute(imageRequest) + if (result is SuccessResult) (result.drawable as? BitmapDrawable)?.bitmap else null + } catch (e: Exception) { + null + } + } + + private fun extractVideoFrame(uri: Uri): Bitmap? { + val retriever = MediaMetadataRetriever() + return try { + retriever.setDataSource(this, uri) + retriever.getFrameAtTime(0, MediaMetadataRetriever.OPTION_CLOSEST_SYNC) + } catch (e: Exception) { + Log.e(TAG, "Error extracting video frame for URI: $uri", e) + null + } finally { + retriever.release() + } + } + + fun openImagePicker() { + Log.d(TAG, "openImagePicker called via Bridge.") + pickMediaLauncher.launch(PickVisualMediaRequest(ActivityResultContracts.PickVisualMedia.ImageAndVideo)) + } + + override fun onBackPressed() { + val wv = webViewInstance + // Wenn wir nicht im WebView-Inhalt sind (htmlContent == null), nutzen wir standard back. + // Wenn WebView aktiv ist, fragen wir JS ob es ein "back" innerhalb der UI gibt. + if (wv != null && wv.visibility == View.VISIBLE) { + wv.evaluateJavascript("window.onBackPressed && window.onBackPressed()") { result -> + // JS gibt "true" zurück wenn es den Event konsumiert hat, sonst "false" oder "null" + val cleanedResult = result?.replace("\"", "")?.trim() + if (cleanedResult != "true") { + runOnUiThread { super.onBackPressed() } + } + } + } else { + super.onBackPressed() + } } /** @@ -1305,8 +1407,8 @@ class MainActivity : ComponentActivity() { * purchase from the WebView UI. */ fun initiateDonationFromWebView() { - Log.d(TAG, "initiateDonationFromWebView called.") - initiateDonationPurchase() + Log.d(TAG, "initiateDonationFromWebView called. Launching Google Play billing directly (PaymentMethodDialog lives in the non-WebView branch).") + launchGooglePlayBilling() } /** @@ -1316,6 +1418,12 @@ class MainActivity : ComponentActivity() { fun setTermuxBackgroundFromWebView(background: Boolean) { Log.d(TAG, "setTermuxBackgroundFromWebView called with background=$background") TermuxExecutionModePreferences.setExecuteInBackground(this, background) + val toastMessage = if (background) { + "Termux commands are executed in the background" + } else { + "Termux commands are executed in the foreground" + } + Toast.makeText(this, toastMessage, Toast.LENGTH_SHORT).show() } /** @@ -1368,6 +1476,39 @@ class MainActivity : ComponentActivity() { } } } + lifecycleScope.launch { + vm.systemMessage.collect { msg -> + wv.post { + wv.evaluateJavascript("window.onSystemMessageChanged && window.onSystemMessageChanged('${escapeForJs(msg)}')", null) + } + } + } + lifecycleScope.launch { + vm.customModelRequestEvents.collect { payloadJson -> + val escaped = escapeForJs(payloadJson) + wv.post { + wv.evaluateJavascript("window.onCustomModelRequest && window.onCustomModelRequest('$escaped')", null) + } + } + } + } + + /** + * Called by [WebViewBridge] with a streaming chunk (accumulated text so far) of a custom, + * fully JSON-defined model's response (see [com.google.ai.sample.util.CustomModelRegistry]). + */ + fun customModelPartialResponseFromWebView(text: String) { + photoReasoningViewModel?.onCustomModelPartialResponse(text) + } + + /** Called by [WebViewBridge] with the final, complete response text of a custom model's turn. */ + fun customModelFinalResponseFromWebView(text: String) { + photoReasoningViewModel?.onCustomModelFinalResponse(text) + } + + /** Called by [WebViewBridge] when a custom model's turn failed in JavaScript. */ + fun customModelErrorFromWebView(message: String) { + photoReasoningViewModel?.onCustomModelError(message) } private fun registerNetworkCallback() { diff --git a/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt index 444e2069..97277ffb 100644 --- a/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt +++ b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt @@ -37,5 +37,23 @@ class PhotoReasoningApplication : Application() { super.onCreate() instance = this Log.d(TAG, "Application created") + + // Re-apply any command pattern overrides that were previously received from the + // WebView bundle, so alternate command syntax for new models keeps working even + // before the WebView has re-fetched/re-applied its config in this session. + com.google.ai.sample.util.CommandPatternOverridesPreferences.load(this)?.let { savedJson -> + val applied = com.google.ai.sample.util.CommandParser.setRemotePatternOverrides(savedJson) + Log.d(TAG, "Restored $applied command pattern override(s) from preferences") + } + + // Re-apply any custom (fully JSON-defined, JS-driven) model definitions and the + // previously active selection, so a custom model keeps working across app restarts. + com.google.ai.sample.util.CustomModelPreferences.loadModelsJson(this)?.let { savedJson -> + val installed = com.google.ai.sample.util.CustomModelRegistry.setModels(savedJson) + Log.d(TAG, "Restored $installed custom model definition(s) from preferences") + } + com.google.ai.sample.util.CustomModelPreferences.loadActiveModelId(this)?.let { savedId -> + com.google.ai.sample.util.CustomModelRegistry.setActiveModelId(savedId) + } } } diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt index 0c3af802..8279a657 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt @@ -445,7 +445,13 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { val captureAndRequestScreenshot = { val currentModel = GenerativeAiViewModelFactory.getCurrentModel() - if (!currentModel.supportsScreenshot || onlyTermuxContext) { + // A custom (JSON-defined) model, if active, overrides the stale native ModelOption's + // flag here - otherwise the autonomous screenshot loop would silently never send + // real screenshots to a custom vision model (it would fall back to text-only screen + // info every time, regardless of "supportsScreenshot" in custom-models.json). + val effectiveSupportsScreenshot = com.google.ai.sample.util.CustomModelRegistry.getActiveModel() + ?.supportsScreenshot ?: currentModel.supportsScreenshot + if (!effectiveSupportsScreenshot || onlyTermuxContext) { Log.d(TAG, "Command.TakeScreenshot: Model has no screenshot support, capturing screen info only.") showToast("Capturing screen info...", false) val screenInfo = buildScreenInfoPayload(captureScreenInformation()) diff --git a/app/src/main/kotlin/com/google/ai/sample/WebViewBridge.kt b/app/src/main/kotlin/com/google/ai/sample/WebViewBridge.kt index b07334a0..2fd2a2ce 100644 --- a/app/src/main/kotlin/com/google/ai/sample/WebViewBridge.kt +++ b/app/src/main/kotlin/com/google/ai/sample/WebViewBridge.kt @@ -6,6 +6,9 @@ import android.webkit.JavascriptInterface import android.webkit.WebView import com.google.ai.sample.feature.multimodal.PhotoReasoningUiState import com.google.ai.sample.util.GenerationSettingsPreferences +import com.google.ai.sample.util.SystemMessageEntry +import com.google.ai.sample.util.SystemMessageEntryPreferences +import com.google.ai.sample.util.SystemMessagePreferences import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch @@ -21,23 +24,36 @@ class WebViewBridge(private val mainActivity: MainActivity) { @JavascriptInterface fun getSystemMessage(): String { - val vm = mainActivity.getPhotoReasoningViewModel() - if (vm != null) return vm.systemMessage.value - return context.getSharedPreferences(PREFS_WEBVIEW, Context.MODE_PRIVATE) - .getString(KEY_SYS_MSG, "") ?: "" + val viewModel = mainActivity.getPhotoReasoningViewModel() + val currentMessage = viewModel?.systemMessage?.value ?: "" + + // If system message is empty and ViewModel is not initialized, load from preferences + if (currentMessage.isEmpty() && (viewModel?.isInitialized?.value == false)) { + val savedMessage = SystemMessagePreferences.loadSystemMessage(context) + Log.d(TAG, "getSystemMessage: Loading from preferences because ViewModel not initialized. Length: ${savedMessage.length}") + return savedMessage + } + + return currentMessage } @JavascriptInterface fun setSystemMessage(message: String) { - context.getSharedPreferences(PREFS_WEBVIEW, Context.MODE_PRIVATE) - .edit().putString(KEY_SYS_MSG, message).apply() mainActivity.getPhotoReasoningViewModel()?.updateSystemMessage(message, context) } + @JavascriptInterface + fun restoreSystemMessage() { + mainActivity.runOnUiThread { + mainActivity.getPhotoReasoningViewModel()?.restoreSystemMessage(context) + } + } + // ── Model Selection ─────────────────────────────────────────────────────── @JavascriptInterface fun getSelectedModelId(): String { + com.google.ai.sample.util.CustomModelRegistry.getActiveModelId()?.let { return it } return GenerativeAiViewModelFactory.getCurrentModel().name } @@ -45,12 +61,25 @@ class WebViewBridge(private val mainActivity: MainActivity) { fun setSelectedModel(id: String) { try { val model = ModelOption.valueOf(id) + com.google.ai.sample.util.CustomModelRegistry.clearActiveModel() + com.google.ai.sample.util.CustomModelPreferences.saveActiveModelId(context, null) GenerativeAiViewModelFactory.setModel(model, context) mainActivity.runOnUiThread { mainActivity.onModelChangedFromWebView() } } catch (e: IllegalArgumentException) { - Log.w(TAG, "setSelectedModel: unknown model id '$id'") + // Not a built-in ModelOption - check whether it's a custom, JSON-defined model + // (see CustomModelRegistry). This is what lets a brand-new model/provider be + // selected without it ever having existed as a compiled-in enum constant. + val activated = com.google.ai.sample.util.CustomModelRegistry.setActiveModelId(id) + if (activated) { + com.google.ai.sample.util.CustomModelPreferences.saveActiveModelId(context, id) + mainActivity.runOnUiThread { + mainActivity.getPhotoReasoningViewModel()?.closeOfflineModel() + } + } else { + Log.w(TAG, "setSelectedModel: unknown model id '$id' (not a ModelOption nor a known custom model)") + } } } @@ -112,44 +141,33 @@ class WebViewBridge(private val mainActivity: MainActivity) { @JavascriptInterface fun getDatabaseEntries(): String { - val prefs = context.getSharedPreferences(PREFS_WEBVIEW_DB, Context.MODE_PRIVATE) - return prefs.getString(KEY_DB_ENTRIES, "[]") ?: "[]" + val entries = SystemMessageEntryPreferences.loadEntries(context) + val arr = JSONArray() + entries.forEach { + arr.put(JSONObject().put("title", it.title).put("guide", it.guide)) + } + return arr.toString() } @JavascriptInterface fun addDatabaseEntry(title: String, guide: String) { - val prefs = context.getSharedPreferences(PREFS_WEBVIEW_DB, Context.MODE_PRIVATE) - val arr = JSONArray(prefs.getString(KEY_DB_ENTRIES, "[]") ?: "[]") - arr.put(JSONObject().put("title", title).put("guide", guide)) - prefs.edit().putString(KEY_DB_ENTRIES, arr.toString()).apply() + SystemMessageEntryPreferences.addEntry(context, SystemMessageEntry(title, guide)) } @JavascriptInterface fun updateDatabaseEntry(oldTitle: String, newTitle: String, guide: String) { - val prefs = context.getSharedPreferences(PREFS_WEBVIEW_DB, Context.MODE_PRIVATE) - val arr = JSONArray(prefs.getString(KEY_DB_ENTRIES, "[]") ?: "[]") - val newArr = JSONArray() - for (i in 0 until arr.length()) { - val obj = arr.getJSONObject(i) - if (obj.getString("title") == oldTitle) { - newArr.put(JSONObject().put("title", newTitle).put("guide", guide)) - } else { - newArr.put(obj) - } + val oldEntry = SystemMessageEntryPreferences.loadEntries(context).find { it.title == oldTitle } + if (oldEntry != null) { + SystemMessageEntryPreferences.updateEntry(context, oldEntry, SystemMessageEntry(newTitle, guide)) } - prefs.edit().putString(KEY_DB_ENTRIES, newArr.toString()).apply() } @JavascriptInterface fun deleteDatabaseEntry(title: String) { - val prefs = context.getSharedPreferences(PREFS_WEBVIEW_DB, Context.MODE_PRIVATE) - val arr = JSONArray(prefs.getString(KEY_DB_ENTRIES, "[]") ?: "[]") - val newArr = JSONArray() - for (i in 0 until arr.length()) { - val obj = arr.getJSONObject(i) - if (obj.getString("title") != title) newArr.put(obj) + val entry = SystemMessageEntryPreferences.loadEntries(context).find { it.title == title } + if (entry != null) { + SystemMessageEntryPreferences.deleteEntry(context, entry) } - prefs.edit().putString(KEY_DB_ENTRIES, newArr.toString()).apply() } // ── Generation Settings ─────────────────────────────────────────────────── @@ -157,8 +175,17 @@ class WebViewBridge(private val mainActivity: MainActivity) { @JavascriptInterface fun getGenerationSettings(modelId: String): String { return try { - val model = ModelOption.valueOf(modelId) - val s = GenerationSettingsPreferences.loadSettings(context, model.modelName) + // Resolve to the persistence key the same way regardless of whether this is a + // built-in ModelOption or a custom (JSON-defined) model: GenerationSettingsPreferences + // itself is already keyed by an arbitrary string, not by the ModelOption enum, so no + // new storage mechanism is needed here - only this id-resolution step. + val settingsKey = try { + ModelOption.valueOf(modelId).modelName + } catch (e: IllegalArgumentException) { + com.google.ai.sample.util.CustomModelRegistry.findById(modelId)?.id + ?: throw e + } + val s = GenerationSettingsPreferences.loadSettings(context, settingsKey) JSONObject() .put("temperature", s.temperature) .put("topP", s.topP) @@ -173,10 +200,15 @@ class WebViewBridge(private val mainActivity: MainActivity) { @JavascriptInterface fun saveGenerationSettings(modelId: String, temperature: Float, topP: Float, topK: Int) { try { - val model = ModelOption.valueOf(modelId) + val settingsKey = try { + ModelOption.valueOf(modelId).modelName + } catch (e: IllegalArgumentException) { + com.google.ai.sample.util.CustomModelRegistry.findById(modelId)?.id + ?: throw e + } GenerationSettingsPreferences.saveSettings( context, - model.modelName, + settingsKey, GenerationSettingsPreferences.GenerationSettings(temperature, topP, topK) ) } catch (e: Exception) { @@ -184,12 +216,63 @@ class WebViewBridge(private val mainActivity: MainActivity) { } } + // ── Custom Models (entirely JSON-defined, JS-driven - see CustomModelRegistry) ────────── + // A "custom model" never existed as a compiled ModelOption. Its API call is made by JS + // itself (fetch()), not by native networking code, so adding one - even for a brand-new + // provider - needs only a custom-models.json commit, no app release. + + @JavascriptInterface + fun setCustomModelOverrides(json: String): Int { + return try { + val installed = com.google.ai.sample.util.CustomModelRegistry.setModels(json) + com.google.ai.sample.util.CustomModelPreferences.saveModelsJson(context, json) + installed + } catch (e: Exception) { + Log.e(TAG, "setCustomModelOverrides error: ${e.message}") + 0 + } + } + + @JavascriptInterface + fun getCustomModelOverrides(): String { + return com.google.ai.sample.util.CustomModelPreferences.loadModelsJson(context) ?: "[]" + } + + @JavascriptInterface + fun setCustomModelApiKey(modelId: String, key: String) { + try { + com.google.ai.sample.util.CustomModelPreferences.saveApiKey(context, modelId, key) + } catch (e: Exception) { + Log.e(TAG, "setCustomModelApiKey error: ${e.message}") + } + } + + @JavascriptInterface + fun getCustomModelApiKey(modelId: String): String { + return com.google.ai.sample.util.CustomModelPreferences.loadApiKey(context, modelId) ?: "" + } + // ── Chat Operations ─────────────────────────────────────────────────────── @JavascriptInterface fun sendMessage(text: String) { mainActivity.runOnUiThread { - mainActivity.sendMessageFromWebView(text) + mainActivity.sendMessageFromWebView(text, emptyList()) + } + } + + @JavascriptInterface + fun sendMessageWithImages(text: String, urisCsv: String) { + val uris = urisCsv.split(",").filter { it.isNotBlank() }.map { android.net.Uri.parse(it) } + mainActivity.runOnUiThread { + mainActivity.sendMessageFromWebView(text, uris) + } + } + + @JavascriptInterface + fun pickImage() { + mainActivity.runOnUiThread { + mainActivity.openImagePicker() } } @@ -207,21 +290,41 @@ class WebViewBridge(private val mainActivity: MainActivity) { } } + // ── Custom Model Responses ─────────────────────────────────────────────── + // Called by JS after it performed the actual fetch() to a custom model's endpoint. The + // text is fed into the EXISTING, unmodified command-parsing/execution/persistence + // pipeline (PhotoReasoningCommandProcessing, AccessibilityCommandQueue, chat history) - + // only the network transport differs from a built-in ModelOption. + @JavascriptInterface - fun isGenerationRunning(): Boolean { - return mainActivity.getPhotoReasoningViewModel()?.isGenerationRunningFlow?.value ?: false + fun onCustomModelPartialResponse(text: String) { + mainActivity.runOnUiThread { + mainActivity.getPhotoReasoningViewModel()?.onCustomModelPartialResponse(text) + } } @JavascriptInterface - fun isOfflineModelLoaded(): Boolean { - return mainActivity.getPhotoReasoningViewModel()?.isOfflineGpuModelLoadedFlow?.value ?: false + fun onCustomModelFinalResponse(text: String) { + mainActivity.runOnUiThread { + mainActivity.getPhotoReasoningViewModel()?.onCustomModelFinalResponse(text) + } } - // ── Custom Models (no-op – section removed from HTML) ───────────────────── + @JavascriptInterface + fun onCustomModelError(message: String) { + mainActivity.runOnUiThread { + mainActivity.getPhotoReasoningViewModel()?.onCustomModelError(message) + } + } @JavascriptInterface - fun addCustomModel(json: String) { - Log.d(TAG, "addCustomModel called (no-op, section removed from UI): $json") + fun isGenerationRunning(): Boolean { + return mainActivity.getPhotoReasoningViewModel()?.isGenerationRunningFlow?.value ?: false + } + + @JavascriptInterface + fun isOfflineModelLoaded(): Boolean { + return mainActivity.getPhotoReasoningViewModel()?.isOfflineGpuModelLoadedFlow?.value ?: false } // ── Backend Preference ──────────────────────────────────────────────────── @@ -264,14 +367,37 @@ class WebViewBridge(private val mainActivity: MainActivity) { } } + @JavascriptInterface + fun getTermuxBackground(): Boolean { + return com.google.ai.sample.util.TermuxExecutionModePreferences.executeInBackground(context) + } + + // ── Command Pattern Overrides (remote-updatable command syntax) ──────────── + // Lets the WebView bundle teach the native command parser new/alternate ways to spell + // an *existing* action (see CommandPatternConfig for the safety boundary). This is what + // makes "a new model emits slightly different command syntax" fixable via a repo commit + // instead of an app release. + + @JavascriptInterface + fun setCommandPatternOverrides(json: String): Int { + return try { + val applied = com.google.ai.sample.util.CommandParser.setRemotePatternOverrides(json) + com.google.ai.sample.util.CommandPatternOverridesPreferences.save(context, json) + applied + } catch (e: Exception) { + Log.e(TAG, "setCommandPatternOverrides error: ${e.message}") + 0 + } + } + + @JavascriptInterface + fun getCommandPatternOverrides(): String { + return com.google.ai.sample.util.CommandPatternOverridesPreferences.load(context) ?: "[]" + } + // ── Helpers ─────────────────────────────────────────────────────────────── companion object { - private const val PREFS_WEBVIEW = "webview_prefs" - private const val PREFS_WEBVIEW_DB = "webview_db" - private const val KEY_SYS_MSG = "sysMsg" - private const val KEY_DB_ENTRIES = "entries" - fun jsEscape(s: String): String = s.replace("\\", "\\\\") .replace("'", "\\'") diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index 1399dc43..ef02c3ed 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -43,6 +43,9 @@ import com.google.ai.sample.feature.multimodal.dtos.TempFilePathCollector import kotlinx.coroutines.Dispatchers import java.util.ArrayList // Required for StringArrayListExtra import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.MutableSharedFlow +import kotlinx.coroutines.flow.SharedFlow +import kotlinx.coroutines.flow.asSharedFlow import kotlinx.serialization.encodeToString import kotlinx.coroutines.Job import kotlinx.coroutines.flow.StateFlow @@ -118,6 +121,15 @@ class PhotoReasoningViewModel( private val _isInitializingOfflineModelFlow = MutableStateFlow(false) val isInitializingOfflineModelFlow: StateFlow = _isInitializingOfflineModelFlow.asStateFlow() + // Emits one JSON payload per turn that should be answered by a *custom*, fully JSON-defined + // model (see CustomModelRegistry). MainActivity collects this and hands the payload to + // window.onCustomModelRequest() in the WebView, which performs the actual network call + // (fetch()) and reports back via onCustomModelPartialResponse/onCustomModelFinalResponse/ + // onCustomModelError. Native code for every existing (ModelOption-based) model is + // untouched - this flow only ever emits when a custom model is the active selection. + private val _customModelRequestEvents = MutableSharedFlow(extraBufferCapacity = 4) + val customModelRequestEvents: SharedFlow = _customModelRequestEvents.asSharedFlow() + private val app: Application get() = getApplication() @@ -795,10 +807,19 @@ class PhotoReasoningViewModel( screenInfoForPrompt: String? = null, imageUrisForChat: List? = null ) { - val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() clearStaleErrorState() stopExecutionFlag.set(false) + // A custom, fully JSON-defined model (added via custom-models.json, never compiled into + // ModelOption) is active: delegate the actual API call to JavaScript in the WebView + // instead of any native networking path. See CustomModelRegistry / reasonWithCustomJsModel. + com.google.ai.sample.util.CustomModelRegistry.getActiveModel()?.let { customModel -> + reasonWithCustomJsModel(customModel, userInput, selectedImages, screenInfoForPrompt, imageUrisForChat) + return + } + + val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() + // Check for Human Expert model if (currentModel == ModelOption.HUMAN_EXPERT) { // If we already have a specialized session running, maybe just send the text? @@ -1453,6 +1474,131 @@ class PhotoReasoningViewModel( } } + /** + * Handles a turn for a *custom*, fully JSON-defined model (see [CustomModelRegistry]). + * Unlike every other `reasonWith*` function, this never makes a network call itself - + * it only assembles everything JavaScript needs to make the call, and emits it via + * [customModelRequestEvents]. The actual HTTP request happens in the WebView's + * `window.onCustomModelRequest`, which then reports back through + * [onCustomModelPartialResponse] / [onCustomModelFinalResponse] / [onCustomModelError] - + * which feed into the exact same chat/command-processing pipeline every other model uses. + */ + private fun reasonWithCustomJsModel( + customModel: com.google.ai.sample.util.CustomModelDefinition, + userInput: String, + selectedImages: List, + screenInfoForPrompt: String?, + imageUrisForChat: List? + ) { + val context = appContext + + val userMessageText = if (!screenInfoForPrompt.isNullOrBlank()) { + "$userInput\n\n$screenInfoForPrompt" + } else { + userInput + } + + val userMessage = PhotoReasoningMessage( + text = userMessageText, + participant = PhotoParticipant.USER, + imageUris = if (customModel.supportsScreenshot) (imageUrisForChat ?: emptyList()) else emptyList(), + isPending = false + ) + appendUserAndPendingModelMessages(userMessage) + + _uiState.value = PhotoReasoningUiState.Loading + resetStreamingCommandState() + + viewModelScope.launch(Dispatchers.IO) { + try { + val systemMessageText = _systemMessage.value + val formattedDbEntries = PhotoReasoningTextPolicies.formatDatabaseEntriesAsText(context) + + val allMessages = PhotoReasoningScreenElementHistoryPolicy.sanitizeMessages(_chatState.getAllMessages()) + // Exclude the pending AI message and the user message we just added above - + // both get sent separately/explicitly in the payload below. + val historyMessages = allMessages.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }.dropLast(1) + + val historyJson = org.json.JSONArray() + historyMessages.forEach { message -> + val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" + historyJson.put(org.json.JSONObject().put("role", role).put("text", message.text)) + } + + val imagesJson = org.json.JSONArray() + if (customModel.supportsScreenshot) { + for (bitmap in selectedImages) { + imagesJson.put(com.google.ai.sample.network.PuterApiClient.bitmapToBase64DataUri(bitmap)) + } + } + + val apiKey = com.google.ai.sample.util.CustomModelPreferences.loadApiKey(context, customModel.id) ?: "" + + // Same storage as every other model (GenerationSettingsPreferences is keyed by + // an arbitrary string, not by the ModelOption enum) - just keyed by the custom + // model's id instead of model.modelName. See WebViewBridge.getGenerationSettings/ + // saveGenerationSettings, which the WebView's existing settings UI already calls + // with this same id. + val genSettings = com.google.ai.sample.util.GenerationSettingsPreferences.loadSettings(context, customModel.id) + + val payload = org.json.JSONObject().apply { + put("modelId", customModel.id) + put("modelName", customModel.modelName) + put("endpoint", customModel.endpoint) + put("apiKeyHeader", customModel.apiKeyHeader) + put("apiKeyPrefix", customModel.apiKeyPrefix) + put("apiKey", apiKey) + put("stream", customModel.stream) + put("temperature", genSettings.temperature) + put("topP", genSettings.topP) + if (customModel.supportsTopK) { + put("topK", genSettings.topK) + } + put("systemMessage", systemMessageText) + put("databaseEntries", formattedDbEntries) + put("history", historyJson) + put("userText", userMessageText) + put("images", imagesJson) + } + + _customModelRequestEvents.emit(payload.toString()) + } catch (e: Exception) { + Log.e(TAG, "reasonWithCustomJsModel: failed to build request: ${e.message}", e) + withContext(Dispatchers.Main) { + onCustomModelError(e.message ?: "Unknown error building request") + } + } + } + } + + /** + * Reports a streaming chunk of a custom model's response (accumulated so far, not a delta). + * Mirrors exactly what every other model's streaming callback does + * (e.g. [reasonWithCerebras]'s `openAiStreamParser.parse` callback) so command execution, + * the chat bubble, etc. behave identically regardless of which model produced the text. + */ + fun onCustomModelPartialResponse(accumulatedText: String) { + if (!isTaskCompletedByAi) { + replaceAiMessageText(accumulatedText, isPending = true) + processCommandsIncrementally(accumulatedText) + } + } + + /** Reports the final, complete response text of a custom model's turn. */ + fun onCustomModelFinalResponse(finalText: String) { + _uiState.value = PhotoReasoningUiState.Success(finalText) + finalizeAiMessage(finalText) + processCommands(finalText) + saveChatHistory(appContext) + } + + /** Reports that the custom model's turn failed (network error, non-2xx, bad JSON, ...). */ + fun onCustomModelError(message: String) { + _uiState.value = PhotoReasoningUiState.Error(message) + appendErrorMessage("Error: $message") + saveChatHistory(appContext) + } + private fun reasonWithPuter( userInput: String, selectedImages: List, @@ -2214,11 +2360,16 @@ class PhotoReasoningViewModel( } /** - * Restore the system message to its default value + * Restore the system message to its default value. + * + * The authoritative default now lives in index.html (DEFAULT_SYSTEM_MSG). + * Bridge.restoreSystemMessage() in JS calls Android.setSystemMessage(DEFAULT_SYSTEM_MSG) + * directly, so the full default text is immediately written back to prefs via + * updateSystemMessage(). This function is kept as a fallback; getDefaultSystemMessage() + * returns "" so calling it is effectively a no-op from the native side alone. */ fun restoreSystemMessage(context: Context) { - val defaultMessage = SystemMessagePreferences.getDefaultSystemMessage() - updateSystemMessage(defaultMessage, context) + updateSystemMessage(SystemMessagePreferences.getDefaultSystemMessage(), context) } /** diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt index d1c458f6..1ec5e1bf 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt @@ -8,12 +8,19 @@ import android.util.Log object CommandParser { private const val TAG = "CommandParser" private val SINGLE_INSTANCE_COMMAND_TYPES = setOf( - CommandTypeEnum.TAKE_SCREENSHOT, - CommandTypeEnum.COMPLETED + CommandType.TAKE_SCREENSHOT, + CommandType.COMPLETED ) - // Enum to represent different command types - private enum class CommandTypeEnum { + /** + * Enum representing the different *kinds* of commands the app knows how to execute. + * + * This is intentionally public: [CommandPatternConfig] uses it to validate remotely + * supplied pattern overrides against a fixed whitelist, so that remote config can only + * ever attach a new regular expression to an action that already exists in compiled + * code - never introduce a brand-new kind of action. + */ + enum class CommandType { CLICK_BUTTON, LONG_CLICK_BUTTON, TAP_COORDINATES, TAKE_SCREENSHOT, COMPLETED, WAIT, PRESS_HOME, PRESS_BACK, SHOW_RECENT_APPS, SCROLL_DOWN, SCROLL_UP, SCROLL_LEFT, SCROLL_RIGHT, SCROLL_DOWN_FROM_COORDINATES, SCROLL_UP_FROM_COORDINATES, @@ -27,72 +34,116 @@ object CommandParser { val id: String, // For debugging val regex: Regex, val commandBuilder: (MatchResult) -> Command, - val commandType: CommandTypeEnum // Used for single-instance command check + val commandType: CommandType // Used for single-instance command check ) private data class ProcessedMatch( val startIndex: Int, val endIndex: Int, val command: Command, - val commandType: CommandTypeEnum + val commandType: CommandType ) // Master list of all patterns private val ALL_PATTERNS: List = listOf( // Enter key patterns - PatternInfo("enterKey1", Regex("(?i)\\benter\\(\\)"), { Command.PressEnterKey }, CommandTypeEnum.PRESS_ENTER_KEY), + PatternInfo("enterKey1", Regex("(?i)\\benter\\(\\)"), { Command.PressEnterKey }, CommandType.PRESS_ENTER_KEY), // Model selection patterns - PatternInfo("highReasoning1", Regex("(?i)\\bhighReasoningModel\\(\\)"), { Command.UseHighReasoningModel }, CommandTypeEnum.USE_HIGH_REASONING_MODEL), - PatternInfo("lowReasoning1", Regex("(?i)\\blowReasoningModel\\(\\)"), { Command.UseLowReasoningModel }, CommandTypeEnum.USE_LOW_REASONING_MODEL), + PatternInfo("highReasoning1", Regex("(?i)\\bhighReasoningModel\\(\\)"), { Command.UseHighReasoningModel }, CommandType.USE_HIGH_REASONING_MODEL), + PatternInfo("lowReasoning1", Regex("(?i)\\blowReasoningModel\\(\\)"), { Command.UseLowReasoningModel }, CommandType.USE_LOW_REASONING_MODEL), // Write text patterns - PatternInfo("writeText1", Regex("(?i)\\bwriteText\\([\"']([^\"']+)[\"']\\)"), { match -> Command.WriteText(match.groupValues[1]) }, CommandTypeEnum.WRITE_TEXT), - PatternInfo("termux1", Regex("""(?i)\bTermux\(\s*(["'])((?:\\.|(?!\1\s*\)).)*)\1\s*\)"""), { match -> Command.TermuxCommand(match.groupValues[2]) }, CommandTypeEnum.TERMUX_COMMAND), + PatternInfo("writeText1", Regex("(?i)\\bwriteText\\([\"']([^\"']+)[\"']\\)"), { match -> Command.WriteText(match.groupValues[1]) }, CommandType.WRITE_TEXT), + PatternInfo("termux1", Regex("""(?i)\bTermux\(\s*(["'])((?:\\.|(?!\1\s*\)).)*)\1\s*\)"""), { match -> Command.TermuxCommand(match.groupValues[2]) }, CommandType.TERMUX_COMMAND), // Click (long) button patterns - PatternInfo("clickBtn1", Regex("(?i)\\bclick\\([\"']([^\"']+)[\"']"), { match -> Command.ClickButton(match.groupValues[1]) }, CommandTypeEnum.CLICK_BUTTON), - PatternInfo("longClickBtn1", Regex("(?i)\\blongClick\\([\"']([^\"']+)[\"']"), { match -> Command.LongClickButton(match.groupValues[1]) }, CommandTypeEnum.LONG_CLICK_BUTTON), + PatternInfo("clickBtn1", Regex("(?i)\\bclick\\([\"']([^\"']+)[\"']"), { match -> Command.ClickButton(match.groupValues[1]) }, CommandType.CLICK_BUTTON), + PatternInfo("longClickBtn1", Regex("(?i)\\blongClick\\([\"']([^\"']+)[\"']"), { match -> Command.LongClickButton(match.groupValues[1]) }, CommandType.LONG_CLICK_BUTTON), // Tap coordinates patterns - PatternInfo("tapCoords1", Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"), { match -> Command.TapCoordinates(match.groupValues[1], match.groupValues[2]) }, CommandTypeEnum.TAP_COORDINATES), + PatternInfo("tapCoords1", Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"), { match -> Command.TapCoordinates(match.groupValues[1], match.groupValues[2]) }, CommandType.TAP_COORDINATES), // Screenshot, completion and wait patterns - PatternInfo("screenshot1", Regex("(?i)\\btakeScreenshot\\(\\)"), { Command.TakeScreenshot }, CommandTypeEnum.TAKE_SCREENSHOT), - PatternInfo("completed1", Regex("(?i)\\bcompleted\\(\\)"), { Command.Completed }, CommandTypeEnum.COMPLETED), - PatternInfo("wait1", Regex("(?i)\\bWait\\(\\s*(\\d+)\\s*\\)"), { match -> Command.Wait(match.groupValues[1].toLong()) }, CommandTypeEnum.WAIT), + PatternInfo("screenshot1", Regex("(?i)\\btakeScreenshot\\(\\)"), { Command.TakeScreenshot }, CommandType.TAKE_SCREENSHOT), + PatternInfo("completed1", Regex("(?i)\\bcompleted\\(\\)"), { Command.Completed }, CommandType.COMPLETED), + PatternInfo("wait1", Regex("(?i)\\bWait\\(\\s*(\\d+)\\s*\\)"), { match -> Command.Wait(match.groupValues[1].toLong()) }, CommandType.WAIT), // Home button patterns - PatternInfo("home1", Regex("(?i)\\bhome\\(\\)"), { Command.PressHomeButton }, CommandTypeEnum.PRESS_HOME), + PatternInfo("home1", Regex("(?i)\\bhome\\(\\)"), { Command.PressHomeButton }, CommandType.PRESS_HOME), // Back button patterns - PatternInfo("back1", Regex("(?i)\\bback\\(\\)"), { Command.PressBackButton }, CommandTypeEnum.PRESS_BACK), + PatternInfo("back1", Regex("(?i)\\bback\\(\\)"), { Command.PressBackButton }, CommandType.PRESS_BACK), // Recent apps patterns - PatternInfo("recentApps1", Regex("(?i)\\brecentApps\\(\\)"), { Command.ShowRecentApps }, CommandTypeEnum.SHOW_RECENT_APPS), + PatternInfo("recentApps1", Regex("(?i)\\brecentApps\\(\\)"), { Command.ShowRecentApps }, CommandType.SHOW_RECENT_APPS), // Scroll patterns (simple) - PatternInfo("scrollDown1", Regex("(?i)\\bscrollDown\\(\\)"), { Command.ScrollDown }, CommandTypeEnum.SCROLL_DOWN), - PatternInfo("scrollUp1", Regex("(?i)\\bscrollUp\\(\\)"), { Command.ScrollUp }, CommandTypeEnum.SCROLL_UP), - PatternInfo("scrollLeft1", Regex("(?i)\\bscrollLeft\\(\\)"), { Command.ScrollLeft }, CommandTypeEnum.SCROLL_LEFT), - PatternInfo("scrollRight1", Regex("(?i)\\bscrollRight\\(\\)"), { Command.ScrollRight }, CommandTypeEnum.SCROLL_RIGHT), + PatternInfo("scrollDown1", Regex("(?i)\\bscrollDown\\(\\)"), { Command.ScrollDown }, CommandType.SCROLL_DOWN), + PatternInfo("scrollUp1", Regex("(?i)\\bscrollUp\\(\\)"), { Command.ScrollUp }, CommandType.SCROLL_UP), + PatternInfo("scrollLeft1", Regex("(?i)\\bscrollLeft\\(\\)"), { Command.ScrollLeft }, CommandType.SCROLL_LEFT), + PatternInfo("scrollRight1", Regex("(?i)\\bscrollRight\\(\\)"), { Command.ScrollRight }, CommandType.SCROLL_RIGHT), // Scroll from coordinates patterns PatternInfo("scrollDownCoords", Regex("(?i)\\bscrollDown\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)"), - { match -> Command.ScrollDownFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandTypeEnum.SCROLL_DOWN_FROM_COORDINATES), + { match -> Command.ScrollDownFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandType.SCROLL_DOWN_FROM_COORDINATES), PatternInfo("scrollUpCoords", Regex("(?i)\\bscrollUp\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)"), - { match -> Command.ScrollUpFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandTypeEnum.SCROLL_UP_FROM_COORDINATES), + { match -> Command.ScrollUpFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandType.SCROLL_UP_FROM_COORDINATES), PatternInfo("scrollLeftCoords", Regex("(?i)\\bscrollLeft\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)"), - { match -> Command.ScrollLeftFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandTypeEnum.SCROLL_LEFT_FROM_COORDINATES), + { match -> Command.ScrollLeftFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandType.SCROLL_LEFT_FROM_COORDINATES), PatternInfo("scrollRightCoords", Regex("(?i)\\bscrollRight\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)"), - { match -> Command.ScrollRightFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandTypeEnum.SCROLL_RIGHT_FROM_COORDINATES), + { match -> Command.ScrollRightFromCoordinates(match.groupValues[1], match.groupValues[2], match.groupValues[3], match.groupValues[4].toLong()) }, CommandType.SCROLL_RIGHT_FROM_COORDINATES), // Open app patterns - PatternInfo("openApp1", Regex("(?i)\\bopenApp\\([\"']([^\"']+)[\"']\\)"), { match -> Command.OpenApp(match.groupValues[1]) }, CommandTypeEnum.OPEN_APP), + PatternInfo("openApp1", Regex("(?i)\\bopenApp\\([\"']([^\"']+)[\"']\\)"), { match -> Command.OpenApp(match.groupValues[1]) }, CommandType.OPEN_APP), // Retrieve information patterns - PatternInfo("retrieve1", Regex("(?i)\\bretrieve\\([\"']([^\"']+)[\"']\\)"), { match -> Command.Retrieve(match.groupValues[1]) }, CommandTypeEnum.RETRIEVE) + PatternInfo("retrieve1", Regex("(?i)\\bretrieve\\([\"']([^\"']+)[\"']\\)"), { match -> Command.Retrieve(match.groupValues[1]) }, CommandType.RETRIEVE) ) + // One canonical command-builder per CommandType, derived from ALL_PATTERNS above. + // CommandPatternConfig uses this to attach remotely supplied regexes to the existing, + // compiled-in command-construction logic - it can never introduce custom logic of its own. + private val BUILDER_BY_TYPE: Map Command> by lazy { + ALL_PATTERNS.associate { it.commandType to it.commandBuilder } + } + + // Additional patterns supplied at runtime (e.g. fetched together with the WebView bundle) + // so that a new model's slightly different command syntax can be recognized without an + // app update. See [CommandPatternConfig] for the safety boundary this respects. Empty by + // default, i.e. behavior is unchanged unless overrides are explicitly installed. + @Volatile + private var remotePatterns: List = emptyList() + + /** + * Installs additional command-recognition patterns from a remotely supplied JSON config. + * Each entry may only reference an existing [CommandType]; unknown types or invalid + * regexes are skipped (logged) rather than causing a crash, so a malformed remote config + * degrades gracefully to "no extra patterns". + * + * @return the number of overrides that were successfully installed. + */ + @Synchronized + fun setRemotePatternOverrides(json: String): Int { + val parsed = CommandPatternConfig.parse(json) + remotePatterns = parsed.mapNotNull { override -> + val builder = BUILDER_BY_TYPE[override.commandType] + if (builder == null) { + Log.w(TAG, "Skipping remote pattern override '${override.id}': no builder for ${override.commandType}") + null + } else { + PatternInfo(override.id, override.regex, builder, override.commandType) + } + } + Log.d(TAG, "Installed ${remotePatterns.size} remote command pattern override(s)") + return remotePatterns.size + } + + /** Removes all remotely installed pattern overrides, reverting to built-in patterns only. */ + @Synchronized + fun clearRemotePatternOverrides() { + remotePatterns = emptyList() + } + // Buffer for storing partial text between calls private var textBuffer = "" @@ -184,7 +235,7 @@ object CommandParser { private fun processTextInternal(text: String): List { val foundRawMatches = collectRawMatches(text) val finalCommands = mutableListOf() - val addedSingleInstanceCommands = mutableSetOf() + val addedSingleInstanceCommands = mutableSetOf() // Sort matches by start index foundRawMatches.sortBy { it.startIndex } @@ -213,7 +264,7 @@ object CommandParser { private fun collectRawMatches(text: String): MutableList { val foundRawMatches = mutableListOf() - for (patternInfo in ALL_PATTERNS) { + for (patternInfo in ALL_PATTERNS + remotePatterns) { try { patternInfo.regex.findAll(text).forEach { matchResult -> try { diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternConfig.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternConfig.kt new file mode 100644 index 00000000..143274d4 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternConfig.kt @@ -0,0 +1,85 @@ +package com.google.ai.sample.util + +import android.util.Log +import org.json.JSONArray + +/** + * Allows the set of recognized command *syntaxes* to be extended at runtime from a remotely + * fetched JSON config (e.g. shipped alongside the WebView's index.html on the + * feature/webview-test branch), without requiring a new app release. + * + * Example payload (a JSON array, e.g. "command-patterns.json" next to index.html): + * ```json + * [ + * { "id": "clickBtnCapitalized", "commandType": "CLICK_BUTTON", "regex": "(?i)\\bClick\\([\"']([^\"']+)[\"']" } + * ] + * ``` + * + * IMPORTANT (safety boundary): an override can only attach a *new regular expression* to an + * *already existing* [CommandParser.CommandType]. It can never introduce a new kind of action, + * and it can never run arbitrary code - the actual [Command] that gets built (and therefore + * everything that is allowed to happen on the device) is still produced by the same, + * compiled-in builder function that ships with the app for that command type. This means a new + * model that simply phrases an existing action differently (e.g. "Click('...')" instead of + * "click(\"...\")") can be supported purely by editing a JSON file in the repo - while what each + * action is actually allowed to do stays fixed in native code and unrelated to this mechanism. + */ +internal object CommandPatternConfig { + private const val TAG = "CommandPatternConfig" + + data class ParsedOverride( + val id: String, + val commandType: CommandParser.CommandType, + val regex: Regex + ) + + /** + * Parses a JSON array of pattern overrides. Any entry that is malformed, references an + * unknown command type, or contains an invalid regex is skipped (and logged) instead of + * throwing, so a bad remote config degrades to "no extra patterns" rather than crashing + * the app or blocking recognition of built-in patterns. + */ + fun parse(json: String): List { + val result = mutableListOf() + if (json.isBlank()) return result + + try { + val array = JSONArray(json) + for (i in 0 until array.length()) { + val entry = array.optJSONObject(i) + if (entry == null) { + Log.w(TAG, "Skipping override at index $i: not a JSON object") + continue + } + + val id = entry.optString("id", "remote_$i") + val typeName = entry.optString("commandType", "") + val pattern = entry.optString("regex", "") + + if (pattern.isBlank()) { + Log.w(TAG, "Skipping override '$id': empty/missing regex") + continue + } + + val commandType = try { + CommandParser.CommandType.valueOf(typeName) + } catch (e: IllegalArgumentException) { + Log.w(TAG, "Skipping override '$id': unknown commandType '$typeName'") + continue + } + + val regex = try { + Regex(pattern) + } catch (e: Exception) { + Log.w(TAG, "Skipping override '$id': invalid regex '$pattern' (${e.message})") + continue + } + + result.add(ParsedOverride(id, commandType, regex)) + } + } catch (e: Exception) { + Log.e(TAG, "Failed to parse remote command pattern overrides: ${e.message}", e) + } + return result + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternOverridesPreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternOverridesPreferences.kt new file mode 100644 index 00000000..b03ef6cd --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandPatternOverridesPreferences.kt @@ -0,0 +1,38 @@ +package com.google.ai.sample.util + +import android.content.Context +import android.util.Log +import androidx.core.content.edit + +/** + * Persists the most recently received remote command-pattern override JSON (see + * [CommandPatternConfig] / [CommandParser.setRemotePatternOverrides]) so that recognition of + * additional/alternate command syntaxes keeps working across app restarts - including before + * the WebView bundle has re-fetched and re-applied it for the current session. + */ +object CommandPatternOverridesPreferences { + private const val TAG = "CmdPatternOverridesPrefs" + private const val PREFS_NAME = "command_pattern_overrides_prefs" + private const val KEY_JSON = "overrides_json" + + private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) + + /** Saves the raw override JSON as last received from the WebView/remote bundle. */ + fun save(context: Context, json: String) { + try { + prefs(context).edit { putString(KEY_JSON, json) } + } catch (e: Exception) { + Log.e(TAG, "Error saving command pattern overrides: ${e.message}", e) + } + } + + /** Loads the last saved override JSON, or null if none has been received yet. */ + fun load(context: Context): String? { + return try { + prefs(context).getString(KEY_JSON, null) + } catch (e: Exception) { + Log.e(TAG, "Error loading command pattern overrides: ${e.message}", e) + null + } + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CustomModelConfig.kt b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelConfig.kt new file mode 100644 index 00000000..4e3eb450 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelConfig.kt @@ -0,0 +1,82 @@ +package com.google.ai.sample.util + +import android.util.Log +import org.json.JSONArray + +/** + * A model that does not exist as a compiled-in [com.google.ai.sample.ModelOption] at all. + * Its definition - which endpoint to call, what the request looks like, whether it sends + * screenshots - comes entirely from remotely fetched JSON (see [CustomModelConfig]). + * + * The actual HTTP call for these models is made from JavaScript inside the WebView (see + * `window.onCustomModelRequest` in index.html), not from native networking code. That is what + * lets a genuinely new model/provider be added with zero app release, as long as its API is an + * OpenAI-compatible chat-completions endpoint reachable via `fetch()` from the WebView (CORS + * permitting - this must be verified per provider). + */ +data class CustomModelDefinition( + val id: String, + val displayName: String, + val endpoint: String, + val modelName: String, + val apiKeyHeader: String = "Authorization", + val apiKeyPrefix: String = "Bearer ", + val supportsScreenshot: Boolean = false, + val supportsTopK: Boolean = false, + val stream: Boolean = true +) + +/** + * Parses the optional `custom-models.json` file (fetched by the WebView next to index.html) + * into a list of [CustomModelDefinition]. Malformed entries are skipped (logged) rather than + * thrown, so a bad config degrades to "no custom models" instead of crashing the app. + */ +internal object CustomModelConfig { + private const val TAG = "CustomModelConfig" + + fun parse(json: String): List { + val result = mutableListOf() + if (json.isBlank()) return result + + try { + val array = JSONArray(json) + for (i in 0 until array.length()) { + val entry = array.optJSONObject(i) + if (entry == null) { + Log.w(TAG, "Skipping custom model at index $i: not a JSON object") + continue + } + + val id = entry.optString("id", "") + val endpoint = entry.optString("endpoint", "") + val modelName = entry.optString("modelName", "") + + if (id.isBlank() || endpoint.isBlank() || modelName.isBlank()) { + Log.w(TAG, "Skipping custom model at index $i: 'id', 'endpoint' and 'modelName' are required") + continue + } + if (!endpoint.startsWith("https://")) { + Log.w(TAG, "Skipping custom model '$id': endpoint must be https://") + continue + } + + result.add( + CustomModelDefinition( + id = id, + displayName = entry.optString("displayName", id), + endpoint = endpoint, + modelName = modelName, + apiKeyHeader = entry.optString("apiKeyHeader", "Authorization"), + apiKeyPrefix = entry.optString("apiKeyPrefix", "Bearer "), + supportsScreenshot = entry.optBoolean("supportsScreenshot", false), + supportsTopK = entry.optBoolean("supportsTopK", false), + stream = entry.optBoolean("stream", true) + ) + ) + } + } catch (e: Exception) { + Log.e(TAG, "Failed to parse custom-models.json: ${e.message}", e) + } + return result + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CustomModelPreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelPreferences.kt new file mode 100644 index 00000000..d5d146d8 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelPreferences.kt @@ -0,0 +1,75 @@ +package com.google.ai.sample.util + +import android.content.Context +import android.util.Log +import androidx.core.content.edit + +/** + * Persists everything [CustomModelRegistry] needs across app restarts: + * - the last received `custom-models.json` (so definitions survive before the WebView re-fetches it) + * - which custom model (if any) was last selected + * - a per-model API key, since custom models aren't tied to the existing [com.google.ai.sample.ApiProvider] + * enum/[com.google.ai.sample.ApiKeyManager] storage + */ +object CustomModelPreferences { + private const val TAG = "CustomModelPreferences" + private const val PREFS_NAME = "custom_model_prefs" + private const val KEY_MODELS_JSON = "models_json" + private const val KEY_ACTIVE_MODEL_ID = "active_model_id" + private const val KEY_API_KEY_PREFIX = "api_key_" + + private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) + + fun saveModelsJson(context: Context, json: String) { + try { + prefs(context).edit { putString(KEY_MODELS_JSON, json) } + } catch (e: Exception) { + Log.e(TAG, "Error saving custom models json: ${e.message}", e) + } + } + + fun loadModelsJson(context: Context): String? { + return try { + prefs(context).getString(KEY_MODELS_JSON, null) + } catch (e: Exception) { + Log.e(TAG, "Error loading custom models json: ${e.message}", e) + null + } + } + + fun saveActiveModelId(context: Context, id: String?) { + try { + prefs(context).edit { + if (id == null) remove(KEY_ACTIVE_MODEL_ID) else putString(KEY_ACTIVE_MODEL_ID, id) + } + } catch (e: Exception) { + Log.e(TAG, "Error saving active custom model id: ${e.message}", e) + } + } + + fun loadActiveModelId(context: Context): String? { + return try { + prefs(context).getString(KEY_ACTIVE_MODEL_ID, null) + } catch (e: Exception) { + Log.e(TAG, "Error loading active custom model id: ${e.message}", e) + null + } + } + + fun saveApiKey(context: Context, modelId: String, key: String) { + try { + prefs(context).edit { putString(KEY_API_KEY_PREFIX + modelId, key) } + } catch (e: Exception) { + Log.e(TAG, "Error saving api key for custom model '$modelId': ${e.message}", e) + } + } + + fun loadApiKey(context: Context, modelId: String): String? { + return try { + prefs(context).getString(KEY_API_KEY_PREFIX + modelId, null) + } catch (e: Exception) { + Log.e(TAG, "Error loading api key for custom model '$modelId': ${e.message}", e) + null + } + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CustomModelRegistry.kt b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelRegistry.kt new file mode 100644 index 00000000..c67bd0ab --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/util/CustomModelRegistry.kt @@ -0,0 +1,62 @@ +package com.google.ai.sample.util + +import android.util.Log + +/** + * Holds the currently known [CustomModelDefinition]s and which one (if any) is selected. + * + * This is intentionally kept completely separate from [com.google.ai.sample.ModelOption] / + * [com.google.ai.sample.GenerativeAiViewModelFactory]: it does not touch the existing, + * compiled-in model enum or its dispatch logic at all. A custom model is either active (and + * then [PhotoReasoningViewModel]'s `reason()` delegates the actual API call to JavaScript) or + * it isn't, in which case the app behaves exactly as before this feature existed. + */ +object CustomModelRegistry { + private const val TAG = "CustomModelRegistry" + + @Volatile + private var models: List = emptyList() + + @Volatile + private var activeModelId: String? = null + + /** Replaces the known custom models from a remote JSON config. Returns how many were installed. */ + @Synchronized + fun setModels(json: String): Int { + val parsed = CustomModelConfig.parse(json) + models = parsed + // If the previously active model no longer exists in the new config, deactivate it + // rather than silently keep routing to a stale definition. + if (activeModelId != null && parsed.none { it.id == activeModelId }) { + Log.w(TAG, "Previously active custom model '$activeModelId' is no longer in config; deactivating") + activeModelId = null + } + Log.d(TAG, "Installed ${models.size} custom model definition(s)") + return models.size + } + + fun getModels(): List = models + + fun findById(id: String): CustomModelDefinition? = models.find { it.id == id } + + /** @return true if [id] matches a known custom model and was activated. */ + @Synchronized + fun setActiveModelId(id: String): Boolean { + val found = findById(id) + return if (found != null) { + activeModelId = id + true + } else { + false + } + } + + @Synchronized + fun clearActiveModel() { + activeModelId = null + } + + fun getActiveModel(): CustomModelDefinition? = activeModelId?.let { findById(it) } + + fun getActiveModelId(): String? = activeModelId +} diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index f816fd83..2282ee37 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -5,45 +5,26 @@ import android.util.Log import androidx.core.content.edit /** - * Utility class to manage system message persistence + * Utility class to manage system message persistence. + * + * The DEFAULT system message is intentionally NOT stored here – it lives in + * index.html (DEFAULT_SYSTEM_MSG) so it can be updated via a web bundle change + * without an app release. When [loadSystemMessage] returns an empty string the + * caller (WebViewBridge / JS) falls back to the HTML-defined default. */ object SystemMessagePreferences { private const val TAG = "SystemMessagePrefs" private const val PREFS_NAME = "system_message_prefs" private const val KEY_SYSTEM_MESSAGE = "system_message" - private const val KEY_FIRST_START_COMPLETED = "first_start_completed" // New flag - private val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! -You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" - -"Termux("command")" -1. You don't need to open Termux because a run.command intent is being called. -2. Each call to Termux("command") starts a new session. To prevent this, you must first write Termux("tmux new-session -A -s main") and then pass all subsequent commands with Termux("tmux send-keys -t main "command" Enter"). - -3. Do not use wait(seconds) commands when using Termux. - -To write text, click the textfield, thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. -If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. - - -Say "completed()" when the task is finished. - - -Notes: -1. Don't write the commands if you're just planing about it or messaging me. - - -2. If you have questions, open Screen Operator, ask your question(s), and use "completed()" until you receive an human response. - -3. After each message, you will see the screen with additional information about it.""".trimIndent() private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /** - * Save system message to SharedPreferences + * Save system message to SharedPreferences. */ fun saveSystemMessage(context: Context, message: String) { try { - Log.d(TAG, "Saving system message: $message") + Log.d(TAG, "Saving system message (length=${message.length})") prefs(context).edit { putString(KEY_SYSTEM_MESSAGE, message) } } catch (e: Exception) { Log.e(TAG, "Error saving system message: ${e.message}", e) @@ -52,36 +33,26 @@ Notes: /** * Load system message from SharedPreferences. - * On first start, it loads a default message, saves it, and marks first start as completed. + * + * Returns the user-saved message, or an empty string when nothing has been + * saved yet. An empty string is the signal for the WebView layer to use + * its own DEFAULT_SYSTEM_MSG constant (defined in index.html), keeping the + * authoritative default in one place – the HTML bundle. */ fun loadSystemMessage(context: Context): String { - try { - val sharedPreferences = prefs(context) - val isFirstStartCompleted = sharedPreferences.getBoolean(KEY_FIRST_START_COMPLETED, false) - - if (!isFirstStartCompleted) { - Log.d(TAG, "First start detected. Loading and saving default system message.") - sharedPreferences.edit { - putString(KEY_SYSTEM_MESSAGE, DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START) - putBoolean(KEY_FIRST_START_COMPLETED, true) - } - Log.d(TAG, "Loaded default system message: $DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START") - return DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START - } else { - val message = sharedPreferences.getString(KEY_SYSTEM_MESSAGE, "") ?: "" - Log.d(TAG, "Loaded system message from prefs: $message") - return message - } + return try { + val message = prefs(context).getString(KEY_SYSTEM_MESSAGE, "") ?: "" + Log.d(TAG, "Loaded system message from prefs (length=${message.length})") + message } catch (e: Exception) { Log.e(TAG, "Error loading system message: ${e.message}", e) - return "" // Return empty string in case of error, consistent with original behavior + "" } } /** - * Get the default system message. + * Returns an empty string – the default is now owned by the WebView's DEFAULT_SYSTEM_MSG + * in index.html. Kept for source compatibility with callers that may still reference it. */ - fun getDefaultSystemMessage(): String { - return DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START - } + fun getDefaultSystemMessage(): String = "" } diff --git a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt index 91fe6447..c193f99b 100644 --- a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt +++ b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt @@ -10,6 +10,7 @@ class CommandParserTest { @Before fun setUp() { CommandParser.clearBuffer() + CommandParser.clearRemotePatternOverrides() } @Test @@ -123,4 +124,50 @@ class CommandParserTest { assertEquals("su -c \"ifconfig\"", (command as Command.TermuxCommand).command) } + @Test + fun setRemotePatternOverrides_recognizesAlternateSyntaxForExistingCommandType() { + // A hypothetical new model emits "Click('...')" (capitalized, single quotes) instead + // of the built-in "click(\"...\")" syntax. Without an override, this is NOT recognized: + val before = CommandParser.parseCommands("Click('Login')", clearBuffer = true) + assertEquals(0, before.size) + + val applied = CommandParser.setRemotePatternOverrides( + """[{"id":"clickAlt","commandType":"CLICK_BUTTON","regex":"(?i)\\bClick\\([\"']([^\"']+)[\"']"}]""" + ) + assertEquals(1, applied) + + val after = CommandParser.parseCommands("Click('Login')", clearBuffer = true) + assertEquals(1, after.size) + assertTrue(after.first() is Command.ClickButton) + assertEquals("Login", (after.first() as Command.ClickButton).buttonText) + } + + @Test + fun setRemotePatternOverrides_skipsUnknownCommandType() { + val applied = CommandParser.setRemotePatternOverrides( + """[{"id":"bogus","commandType":"DOES_NOT_EXIST","regex":"(?i)\\bfoo\\(\\)"}]""" + ) + assertEquals(0, applied) + } + + @Test + fun setRemotePatternOverrides_skipsInvalidRegexWithoutCrashing() { + val applied = CommandParser.setRemotePatternOverrides( + """[{"id":"badRegex","commandType":"CLICK_BUTTON","regex":"("}]""" + ) + assertEquals(0, applied) + } + + @Test + fun clearRemotePatternOverrides_revertsToBuiltInPatternsOnly() { + CommandParser.setRemotePatternOverrides( + """[{"id":"clickAlt","commandType":"CLICK_BUTTON","regex":"(?i)\\bClick\\([\"']([^\"']+)[\"']"}]""" + ) + assertEquals(1, CommandParser.parseCommands("Click('Login')", clearBuffer = true).size) + + CommandParser.clearRemotePatternOverrides() + + assertEquals(0, CommandParser.parseCommands("Click('Login')", clearBuffer = true).size) + } + } diff --git a/app/src/test/java/com/google/ai/sample/util/CustomModelConfigTest.kt b/app/src/test/java/com/google/ai/sample/util/CustomModelConfigTest.kt new file mode 100644 index 00000000..b28b93e2 --- /dev/null +++ b/app/src/test/java/com/google/ai/sample/util/CustomModelConfigTest.kt @@ -0,0 +1,86 @@ +package com.google.ai.sample.util + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test + +class CustomModelConfigTest { + + @Test + fun parse_validEntry_appliesDefaultsAndFields() { + val json = """ + [{"id":"MY_MODEL","displayName":"My Model","endpoint":"https://api.example.com/v1/chat/completions","modelName":"example/model"}] + """.trimIndent() + + val result = CustomModelConfig.parse(json) + + assertEquals(1, result.size) + val def = result.first() + assertEquals("MY_MODEL", def.id) + assertEquals("My Model", def.displayName) + assertEquals("https://api.example.com/v1/chat/completions", def.endpoint) + assertEquals("example/model", def.modelName) + // Defaults + assertEquals("Authorization", def.apiKeyHeader) + assertEquals("Bearer ", def.apiKeyPrefix) + assertEquals(false, def.supportsScreenshot) + assertEquals(false, def.supportsTopK) + assertEquals(true, def.stream) + } + + @Test + fun parse_explicitFieldsOverrideDefaults() { + val json = """ + [{ + "id":"M2","endpoint":"https://api.example.com/x","modelName":"m", + "apiKeyHeader":"x-api-key","apiKeyPrefix":"", + "supportsScreenshot":true,"supportsTopK":true,"stream":false + }] + """.trimIndent() + + val def = CustomModelConfig.parse(json).first() + + assertEquals("x-api-key", def.apiKeyHeader) + assertEquals("", def.apiKeyPrefix) + assertTrue(def.supportsScreenshot) + assertTrue(def.supportsTopK) + assertEquals(false, def.stream) + // displayName defaults to id when omitted + assertEquals("M2", def.displayName) + } + + @Test + fun parse_skipsEntryMissingRequiredFields() { + val json = """[{"id":"NO_ENDPOINT","modelName":"m"}]""" + assertEquals(0, CustomModelConfig.parse(json).size) + } + + @Test + fun parse_skipsNonHttpsEndpoint() { + val json = """[{"id":"INSECURE","endpoint":"http://api.example.com/x","modelName":"m"}]""" + assertEquals(0, CustomModelConfig.parse(json).size) + } + + @Test + fun parse_skipsMalformedEntryWithoutCrashing() { + assertEquals(0, CustomModelConfig.parse("[1, 2, 3]").size) + assertEquals(0, CustomModelConfig.parse("not json at all").size) + assertEquals(0, CustomModelConfig.parse("").size) + } + + @Test + fun parse_multipleEntriesOnlyValidOnesKept() { + val json = """ + [ + {"id":"GOOD","endpoint":"https://api.example.com/a","modelName":"m1"}, + {"id":"BAD"}, + {"id":"GOOD2","endpoint":"https://api.example.com/b","modelName":"m2"} + ] + """.trimIndent() + + val result = CustomModelConfig.parse(json) + assertEquals(2, result.size) + assertEquals(listOf("GOOD", "GOOD2"), result.map { it.id }) + } +} diff --git a/app/src/test/java/com/google/ai/sample/util/CustomModelRegistryTest.kt b/app/src/test/java/com/google/ai/sample/util/CustomModelRegistryTest.kt new file mode 100644 index 00000000..e31f7259 --- /dev/null +++ b/app/src/test/java/com/google/ai/sample/util/CustomModelRegistryTest.kt @@ -0,0 +1,94 @@ +package com.google.ai.sample.util + +import org.junit.After +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test + +class CustomModelRegistryTest { + + private val sampleJson = """ + [ + {"id":"MODEL_A","endpoint":"https://api.example.com/a","modelName":"a"}, + {"id":"MODEL_B","endpoint":"https://api.example.com/b","modelName":"b"} + ] + """.trimIndent() + + @After + fun tearDown() { + // CustomModelRegistry is a singleton object - reset its state so tests don't bleed + // into each other. + CustomModelRegistry.setModels("[]") + CustomModelRegistry.clearActiveModel() + } + + @Test + fun setModels_installsAllValidEntries() { + val count = CustomModelRegistry.setModels(sampleJson) + assertEquals(2, count) + assertEquals(listOf("MODEL_A", "MODEL_B"), CustomModelRegistry.getModels().map { it.id }) + } + + @Test + fun findById_returnsDefinitionOrNull() { + CustomModelRegistry.setModels(sampleJson) + assertEquals("MODEL_A", CustomModelRegistry.findById("MODEL_A")?.id) + assertNull(CustomModelRegistry.findById("DOES_NOT_EXIST")) + } + + @Test + fun setActiveModelId_succeedsForKnownModel() { + CustomModelRegistry.setModels(sampleJson) + val activated = CustomModelRegistry.setActiveModelId("MODEL_B") + assertTrue(activated) + assertEquals("MODEL_B", CustomModelRegistry.getActiveModelId()) + assertEquals("MODEL_B", CustomModelRegistry.getActiveModel()?.id) + } + + @Test + fun setActiveModelId_failsForUnknownModel() { + CustomModelRegistry.setModels(sampleJson) + val activated = CustomModelRegistry.setActiveModelId("NOT_A_CUSTOM_MODEL") + assertFalse(activated) + assertNull(CustomModelRegistry.getActiveModelId()) + } + + @Test + fun clearActiveModel_deactivates() { + CustomModelRegistry.setModels(sampleJson) + CustomModelRegistry.setActiveModelId("MODEL_A") + CustomModelRegistry.clearActiveModel() + assertNull(CustomModelRegistry.getActiveModel()) + assertNull(CustomModelRegistry.getActiveModelId()) + } + + @Test + fun setModels_deactivatesActiveModelIfRemovedFromNewConfig() { + CustomModelRegistry.setModels(sampleJson) + CustomModelRegistry.setActiveModelId("MODEL_A") + assertEquals("MODEL_A", CustomModelRegistry.getActiveModelId()) + + // New config no longer contains MODEL_A + CustomModelRegistry.setModels("""[{"id":"MODEL_C","endpoint":"https://api.example.com/c","modelName":"c"}]""") + + assertNull(CustomModelRegistry.getActiveModelId()) + } + + @Test + fun setModels_keepsActiveModelIfStillPresentInNewConfig() { + CustomModelRegistry.setModels(sampleJson) + CustomModelRegistry.setActiveModelId("MODEL_A") + + // Re-apply a config that still contains MODEL_A (e.g. unrelated entry added) + CustomModelRegistry.setModels( + """[ + {"id":"MODEL_A","endpoint":"https://api.example.com/a","modelName":"a"}, + {"id":"MODEL_D","endpoint":"https://api.example.com/d","modelName":"d"} + ]""" + ) + + assertEquals("MODEL_A", CustomModelRegistry.getActiveModelId()) + } +} diff --git a/command-patterns.json b/command-patterns.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/command-patterns.json @@ -0,0 +1 @@ +[] diff --git a/custom-models.json b/custom-models.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/custom-models.json @@ -0,0 +1 @@ +[] diff --git a/docs/command-pattern-overrides.md b/docs/command-pattern-overrides.md new file mode 100644 index 00000000..7b9bd502 --- /dev/null +++ b/docs/command-pattern-overrides.md @@ -0,0 +1,64 @@ +# Command pattern overrides (remote-updatable command syntax) + +`CommandParser.kt` recognizes the action commands an AI model emits (`click("...")`, +`tapAtCoordinates(x, y)`, `scrollDown()`, ...) using a fixed set of built-in regular +expressions. Until now, supporting a new model that phrases an existing action slightly +differently (e.g. `Click('...')` instead of `click("...")`) required patching +`CommandParser.kt` and shipping a new app release. + +`command-patterns.json` (this file, fetched by the WebView relative to `index.html`) lets +you add such alternate spellings without an app release. It is optional — if the file is +missing or invalid, the app silently falls back to the built-in patterns only. + +## Format + +A JSON array of override objects: + +```json +[ + { + "id": "clickBtnCapitalized", + "commandType": "CLICK_BUTTON", + "regex": "(?i)\\bClick\\([\"']([^\"']+)[\"']" + } +] +``` + +- `id` — any string, used only for logging. +- `commandType` — must be one of the values below. Unknown values are skipped (logged), + not an error. +- `regex` — a Kotlin/Java regular expression. **It must capture the same groups, in the + same order, as the built-in pattern for that `commandType`** (see `CommandParser.kt`), + since the existing, compiled-in builder function reads `match.groupValues[...]` to + construct the command. If the group count doesn't match, that particular match is + skipped (logged), nothing crashes. + +## Safety boundary + +An override can only attach a new regex to an **existing** `commandType` — it can never +introduce a new kind of action and can never run custom code. What each action is allowed +to do on the device (tap, scroll, open an app, run a Termux command, ...) is always +decided by the same native, compiled-in code; this mechanism only changes which *text* +triggers that pre-existing action. Adding a genuinely new action still requires a native +code change. + +## Available `commandType` values + +`CLICK_BUTTON`, `LONG_CLICK_BUTTON`, `TAP_COORDINATES`, `TAKE_SCREENSHOT`, `COMPLETED`, +`WAIT`, `PRESS_HOME`, `PRESS_BACK`, `SHOW_RECENT_APPS`, `SCROLL_DOWN`, `SCROLL_UP`, +`SCROLL_LEFT`, `SCROLL_RIGHT`, `SCROLL_DOWN_FROM_COORDINATES`, `SCROLL_UP_FROM_COORDINATES`, +`SCROLL_LEFT_FROM_COORDINATES`, `SCROLL_RIGHT_FROM_COORDINATES`, `OPEN_APP`, `WRITE_TEXT`, +`USE_HIGH_REASONING_MODEL`, `USE_LOW_REASONING_MODEL`, `PRESS_ENTER_KEY`, `RETRIEVE`, +`TERMUX_COMMAND`. + +## How it gets applied + +1. The WebView loads `index.html` and fires `window.onAndroidReady()`. +2. That handler fetches `command-patterns.json` (relative to the WebView's base URL) and + passes its raw text to `Android.setCommandPatternOverrides(json)`. +3. The native `CommandParser` installs the parsed overrides and the bridge persists the + raw JSON via `CommandPatternOverridesPreferences`, so it's restored on the next app + start (in `PhotoReasoningApplication.onCreate()`) even before the WebView reloads it. + +To add support for a new model's command syntax: edit `command-patterns.json` in this +repo and commit — no new app version needed. diff --git a/docs/custom-models.md b/docs/custom-models.md new file mode 100644 index 00000000..33b66f05 --- /dev/null +++ b/docs/custom-models.md @@ -0,0 +1,150 @@ +# Custom models (genuinely new models/providers via JSON, no app release) + +Every model that ships with the app is a compiled-in `ModelOption` (see +`GenerativeAiViewModelFactory.kt`), and the native Kotlin networking code for each provider +(Puter, Mistral, Groq, Cerebras, ...) is part of the APK. Adding a *new provider*, or a model +whose request/response shape doesn't match an existing provider's code, normally requires a +Kotlin change and a new app release. + +`custom-models.json` (this file, fetched by the WebView relative to `index.html`) lets you add +such a model anyway, with zero app release - **as long as its API is an OpenAI-compatible +chat-completions endpoint reachable via `fetch()` from the WebView (i.e. it supports CORS for +browser-style requests - verify this per provider; if it doesn't, this mechanism can't be used +for it).** + +The key architectural difference from every built-in model: the actual HTTP request is made by +JavaScript, directly in the WebView (`window.onCustomModelRequest` in `index.html`), not by +native networking code. Native code only assembles the context (system message, history, user +text, images) and hands it to JS; JS calls the provider and reports the result back. + +## Format + +A JSON array of model definitions: + +```json +[ + { + "id": "MY_NEW_MODEL", + "displayName": "My New Model", + "endpoint": "https://api.example.com/v1/chat/completions", + "modelName": "example/my-model-name", + "apiKeyHeader": "Authorization", + "apiKeyPrefix": "Bearer ", + "supportsScreenshot": true, + "supportsTopK": false, + "stream": true + } +] +``` + +- `id` (required) - any unique string. Becomes the model's identity everywhere (selection, + per-model API key storage, ...). Must not collide with a built-in `ModelOption` name. +- `displayName` - shown in the model picker. Defaults to `id`. +- `endpoint` (required) - must be `https://`. +- `modelName` (required) - sent as the `model` field in the request body. +- `apiKeyHeader` / `apiKeyPrefix` - how the API key is attached, e.g. the defaults produce an + `Authorization: Bearer ` header. Some providers use a different header (e.g. `x-api-key` + with no prefix) - set these accordingly. +- `supportsScreenshot` - if true, the current screenshot(s) are included as + `image_url` content parts (base64 data URIs), OpenAI vision-style. +- `supportsTopK` - whether the Top-K slider is shown for this model (not currently sent in the + request - see Limitations). +- `stream` - whether to expect Server-Sent-Events streaming (`data: {...}` lines, + OpenAI-style `choices[0].delta.content`) or a single JSON response + (`choices[0].message.content`). + +## Setting the API key + +Custom models are not tied to the existing per-provider API key storage +(`ApiKeyManager`/`ApiProvider`), since they aren't a fixed enum. Use the bridge directly (e.g. +from a small addition to the API-key UI, or from the browser console while developing): + +```js +Bridge.setCustomModelApiKey('MY_NEW_MODEL', 'sk-...'); +``` + +The key is stored locally on-device (`CustomModelPreferences`) and is **not** part of +`custom-models.json` - never put real secrets in the repo. + +## How it gets applied + +1. The WebView fetches `custom-models.json` on `window.onAndroidReady()`, merges the entries + into the model picker, and pushes the raw JSON to + `Android.setCustomModelOverrides(json)`. +2. Selecting a custom model calls `Android.setSelectedModel(id)` as usual; since `id` isn't a + `ModelOption`, the bridge falls back to `CustomModelRegistry` and activates it there instead. +3. On the next turn, `PhotoReasoningViewModel.reason()` sees a custom model is active and calls + `reasonWithCustomJsModel(...)`, which builds a JSON payload (system message, db entries, + history, user text, images, endpoint/auth config) and emits it on + `customModelRequestEvents`. +4. `MainActivity` forwards that payload to `window.onCustomModelRequest(payloadJson)` in the + WebView, which performs the actual `fetch()` call and streams results back via + `Bridge.onCustomModelPartialResponse` / `onCustomModelFinalResponse` / `onCustomModelError`. +5. Those bridge calls feed into the exact same chat-bubble/command-processing/chat-history + pipeline every other model already uses (`replaceAiMessageText`, `processCommandsIncrementally`, + `finalizeAiMessage`, `processCommands`, `saveChatHistory`) - command syntax recognition, + accessibility execution, and persistence all behave identically regardless of which model + produced the text. +6. The selection (and the model list) is persisted via `CustomModelPreferences`, so it survives + app restarts even before the WebView re-fetches `custom-models.json`. + +## Generation settings (temperature / top-p / top-k) + +Persisted exactly the same way as for every built-in model: `GenerationSettingsPreferences` +is keyed by an arbitrary string, not by the `ModelOption` enum, so it already worked for any +id - `WebViewBridge.getGenerationSettings`/`saveGenerationSettings` just needed to resolve a +custom model's `id` instead of requiring `ModelOption.valueOf()` to succeed. The existing +settings UI (sliders) work unchanged; `reasonWithCustomJsModel` loads the saved values and +sends them as `temperature`/`top_p` (and `top_k`, only if `supportsTopK` is true) in the +request body. + +## Images + +Handled the same way as for every built-in model: the current turn's screenshot(s)/attached +image(s) are JPEG-compressed and base64-encoded (same `PuterApiClient.bitmapToBase64DataUri` +helper every other model uses) and sent as OpenAI-style `image_url` content parts - only if +`supportsScreenshot` is true. Like every other model, only the *current* turn's image(s) are +sent; history messages are text-only (this matches existing app behavior, not a limitation +specific to custom models). + +This required one additional native fix beyond `reasonWithCustomJsModel` itself: the +autonomous "take a screenshot after each command, then continue" loop +(`ScreenOperatorAccessibilityService.executeTakeScreenshotCommand`) decided whether to capture +a *real* screenshot or just text screen-info by checking the stale, native +`GenerativeAiViewModelFactory.getCurrentModel().supportsScreenshot` - which doesn't know about +the active custom model at all. It now checks `CustomModelRegistry.getActiveModel()` first. +Without this fix, a custom vision model would never receive real screenshots during +autonomous operation (only the very first, explicitly-sent message would include an image). + +## Image-*generating* models are not supported + +This only covers models that *receive* images (vision input) - not models that *produce* an +image as their response (e.g. a text-to-image / "Bildmodell" in that sense). That is not +possible today, for either custom or built-in models: + +- The request shape for image generation (e.g. an `/v1/images/generations`-style endpoint) + is fundamentally different from the chat-completions shape `window.onCustomModelRequest` + sends - there's no equivalent of "messages in, image out". +- There is no rendering path for it either: `addModelBubble()` in `index.html` always + HTML-escapes the model's response as plain text - there is no markdown/image rendering for + an AI's response anywhere in the app (only user-attached images get an `` thumbnail). + +Adding this would need a new request/response branch in `window.onCustomModelRequest` (or a +parallel function) *and* a new way to render an image as part of a chat bubble - meaningfully +more work than a `custom-models.json` entry, not something this mechanism enables on its own. + +## Safety / scope notes + +- This only works for providers whose endpoint allows being called via `fetch()` from this + page's origin (CORS). Many AI APIs are explicitly meant to be called this way (e.g. ones + marketed for client-side/browser use); others actively block it. Test before relying on it. +- The model's API key is necessarily visible to JavaScript running in the WebView in order to + set the auth header - this is consistent with the existing bridge (`Bridge.getAllApiKeys()` + already exposes raw built-in-provider keys to JS for the key-management UI), not a new + category of exposure. +- The `endpoint` URL in `custom-models.json` is where the system message, database entries, + chat history, the current message, and (if `supportsScreenshot`) images and the API key all + get sent. Treat changes to this file with the same care as code - a malicious or compromised + `endpoint` value is a real data-exfiltration path, not just a "wrong model" bug. +- Adding a genuinely new *action/command kind* (not just a new model) is out of scope here - + see `docs/command-pattern-overrides.md` for what's possible there. diff --git a/index.html b/index.html index 514fd860..12de0463 100644 --- a/index.html +++ b/index.html @@ -116,11 +116,13 @@ .drop-item.drop-section:hover{background:transparent} /* ── System message card ─────────────────────────────────── */ -#sys-msg-textarea{width:100%;border:1px solid var(--input-border);border-radius:6px;padding:10px 12px;font-size:13px;font-family:inherit;background:var(--input-bg);color:var(--text-primary);outline:none;resize:none;line-height:1.5;overflow-y:auto;margin-top:10px;transition:max-height .2s ease} +#chat-sys-wrap .card-accent{transition:all .3s ease;overflow:hidden} +#sys-msg-textarea{width:100%;border:1px solid var(--input-border);border-radius:6px;padding:10px 12px;font-size:13px;font-family:inherit;background:var(--input-bg);color:var(--text-primary);outline:none;resize:none;line-height:1.5;overflow-y:auto;display:block;transition:height .3s ease} #sys-msg-textarea:focus{border-color:var(--primary)} -#sys-msg-textarea.collapsed{max-height:72px} -#sys-msg-textarea.expanded-keyboard{max-height:45vh} -#sys-msg-textarea.expanded-full{max-height:80vh} + +#sys-msg-textarea.collapsed{height:120px} +#sys-msg-textarea.expanded-keyboard{height:35vh} +#sys-msg-textarea.expanded-full{height:78vh} /* ── Chat bubbles ────────────────────────────────────────── */ .bubble-wrap{display:flex;align-items:flex-start;gap:6px;margin:6px 0} @@ -432,71 +434,91 @@ Falls back to localStorage when running in a browser. ════════════════════════════════════════════════════════ */ const LS = window.localStorage; -const inAndroid = typeof window.Android !== 'undefined'; +const getInAndroid = () => typeof window.Android !== 'undefined'; const Bridge = { /* System message */ - getSystemMessage:()=> inAndroid ? Android.getSystemMessage() : (LS.getItem('sysMsg')||DEFAULT_SYSTEM_MSG), - setSystemMessage:(m)=>{ inAndroid ? Android.setSystemMessage(m) : LS.setItem('sysMsg',m); }, + getSystemMessage:()=> getInAndroid() ? (Android.getSystemMessage()||DEFAULT_SYSTEM_MSG) : (LS.getItem('sysMsg')||DEFAULT_SYSTEM_MSG), + setSystemMessage:(m)=>{ getInAndroid() ? Android.setSystemMessage(m) : LS.setItem('sysMsg',m); }, + restoreSystemMessage:()=>{ if(getInAndroid()) Android.setSystemMessage(DEFAULT_SYSTEM_MSG); else LS.setItem('sysMsg', DEFAULT_SYSTEM_MSG); }, + + /* Termux background */ + getTermuxBackground:()=> getInAndroid() ? Android.getTermuxBackground() : (LS.getItem('termuxBg')==='true'), + setTermuxBackground:(b)=>{ getInAndroid() ? Android.setTermuxBackground(b) : LS.setItem('termuxBg',String(b)); }, /* Model selection */ - getSelectedModelId:()=> inAndroid ? Android.getSelectedModelId() : (LS.getItem('modelId')||'PUTER_QWEN2_5_VL_72B'), - setSelectedModel:(id)=>{ inAndroid ? Android.setSelectedModel(id) : LS.setItem('modelId',id); }, + getSelectedModelId:()=> getInAndroid() ? Android.getSelectedModelId() : (LS.getItem('modelId')||'PUTER_QWEN2_5_VL_72B'), + setSelectedModel:(id)=>{ getInAndroid() ? Android.setSelectedModel(id) : LS.setItem('modelId',id); }, /* API Keys */ - getAllApiKeys:(prov)=> JSON.parse(inAndroid ? Android.getAllApiKeys(prov) : (LS.getItem('keys_'+prov)||'[]')), + getAllApiKeys:(prov)=> JSON.parse(getInAndroid() ? Android.getAllApiKeys(prov) : (LS.getItem('keys_'+prov)||'[]')), addApiKey:(key,prov)=>{ - if(inAndroid) return Android.addApiKey(key,prov); + if(getInAndroid()) return Android.addApiKey(key,prov); const k=Bridge.getAllApiKeys(prov); if(k.includes(key)) return false; k.push(key); LS.setItem('keys_'+prov,JSON.stringify(k)); return true; }, removeApiKey:(key,prov)=>{ - if(inAndroid){Android.removeApiKey(key,prov);return;} + if(getInAndroid()){Android.removeApiKey(key,prov);return;} const k=Bridge.getAllApiKeys(prov).filter(x=>x!==key); LS.setItem('keys_'+prov,JSON.stringify(k)); }, - getCurrentKeyIndex:(prov)=> inAndroid ? Android.getCurrentKeyIndex(prov) : parseInt(LS.getItem('kIdx_'+prov)||'0'), - setCurrentKeyIndex:(idx,prov)=>{ inAndroid ? Android.setCurrentKeyIndex(idx,prov) : LS.setItem('kIdx_'+prov,String(idx)); }, + getCurrentKeyIndex:(prov)=> getInAndroid() ? Android.getCurrentKeyIndex(prov) : parseInt(LS.getItem('kIdx_'+prov)||'0'), + setCurrentKeyIndex:(idx,prov)=>{ getInAndroid() ? Android.setCurrentKeyIndex(idx,prov) : LS.setItem('kIdx_'+prov,String(idx)); }, /* Database entries */ - getDatabaseEntries:()=> JSON.parse(inAndroid ? Android.getDatabaseEntries() : (LS.getItem('dbEntries')||'[]')), + getDatabaseEntries:()=> JSON.parse(getInAndroid() ? Android.getDatabaseEntries() : (LS.getItem('dbEntries')||'[]')), addDatabaseEntry:(title,guide)=>{ - if(inAndroid){Android.addDatabaseEntry(title,guide);return;} + if(getInAndroid()){Android.addDatabaseEntry(title,guide);return;} const e=Bridge.getDatabaseEntries(); e.push({title,guide}); LS.setItem('dbEntries',JSON.stringify(e)); }, updateDatabaseEntry:(oldTitle,title,guide)=>{ - if(inAndroid){Android.updateDatabaseEntry(oldTitle,title,guide);return;} + if(getInAndroid()){Android.updateDatabaseEntry(oldTitle,title,guide);return;} const e=Bridge.getDatabaseEntries().map(x=>x.title===oldTitle?{title,guide}:x); LS.setItem('dbEntries',JSON.stringify(e)); }, deleteDatabaseEntry:(title)=>{ - if(inAndroid){Android.deleteDatabaseEntry(title);return;} + if(getInAndroid()){Android.deleteDatabaseEntry(title);return;} const e=Bridge.getDatabaseEntries().filter(x=>x.title!==title); LS.setItem('dbEntries',JSON.stringify(e)); }, /* Generation settings */ getGenerationSettings:(id)=>{ const def={temperature:0,topP:0,topK:1}; - if(inAndroid) return JSON.parse(Android.getGenerationSettings(id)||'{}'); + if(getInAndroid()) return JSON.parse(Android.getGenerationSettings(id)||'{}'); return JSON.parse(LS.getItem('gen_'+id)||JSON.stringify(def)); }, saveGenerationSettings:(id,temp,topP,topK)=>{ - if(inAndroid){Android.saveGenerationSettings(id,temp,topP,topK);return;} + if(getInAndroid()){Android.saveGenerationSettings(id,temp,topP,topK);return;} LS.setItem('gen_'+id,JSON.stringify({temperature:temp,topP,topK})); }, /* Chat */ - sendMessage:(text)=>{ if(inAndroid) Android.sendMessage(text); else addModelBubble('[Demo] Processing: '+text,false); }, - clearChatHistory:()=>{ if(inAndroid) Android.clearChatHistory(); }, - stopGeneration:()=>{ if(inAndroid) Android.stopGeneration(); }, - isGenerationRunning:()=> inAndroid ? Android.isGenerationRunning() : false, - isOfflineModelLoaded:()=> inAndroid ? Android.isOfflineModelLoaded() : false, + sendMessage:(text)=>{ if(getInAndroid()) Android.sendMessage(text); else addModelBubble('[Demo] Processing: '+text,false); }, + sendMessageWithImages:(text,urisCsv)=>{ if(getInAndroid()) Android.sendMessageWithImages(text,urisCsv); else addModelBubble('[Demo] Processing with images: '+urisCsv,false); }, + pickImage:()=>{ if(getInAndroid()) Android.pickImage(); else document.getElementById('image-file-input').click(); }, + clearChatHistory:()=>{ if(getInAndroid()) Android.clearChatHistory(); }, + stopGeneration:()=>{ if(getInAndroid()) Android.stopGeneration(); }, + isGenerationRunning:()=> getInAndroid() ? Android.isGenerationRunning() : false, + isOfflineModelLoaded:()=> getInAndroid() ? Android.isOfflineModelLoaded() : false, /* Backend */ - getBackendPreference:()=> inAndroid ? Android.getBackendPreference() : (LS.getItem('backend')||'GPU'), - setBackendPreference:(b)=>{ inAndroid ? Android.setBackendPreference(b) : LS.setItem('backend',b); }, + getBackendPreference:()=> getInAndroid() ? Android.getBackendPreference() : (LS.getItem('backend')||'GPU'), + setBackendPreference:(b)=>{ getInAndroid() ? Android.setBackendPreference(b) : LS.setItem('backend',b); }, /* Donation / trial */ - initiateDonation:()=>{ if(inAndroid) Android.initiateDonation(); else alert('Subscription flow would open here'); }, - isPurchased:()=> inAndroid ? Android.isPurchased() : false, + initiateDonation:()=>{ if(getInAndroid()) Android.initiateDonation(); else alert('Subscription flow would open here'); }, + isPurchased:()=> getInAndroid() ? Android.isPurchased() : false, + + /* Command pattern overrides (remote-updatable command syntax for new models) */ + setCommandPatternOverrides:(json)=>{ if(getInAndroid()) return Android.setCommandPatternOverrides(json); }, + getCommandPatternOverrides:()=> getInAndroid() ? Android.getCommandPatternOverrides() : '[]', + + /* Custom models (fully JSON-defined, network call happens here in JS) */ + setCustomModelOverrides:(json)=>{ if(getInAndroid()) return Android.setCustomModelOverrides(json); }, + getCustomModelOverrides:()=> getInAndroid() ? Android.getCustomModelOverrides() : '[]', + setCustomModelApiKey:(modelId,key)=>{ if(getInAndroid()) Android.setCustomModelApiKey(modelId,key); }, + getCustomModelApiKey:(modelId)=> getInAndroid() ? Android.getCustomModelApiKey(modelId) : '', + onCustomModelPartialResponse:(text)=>{ if(getInAndroid()) Android.onCustomModelPartialResponse(text); }, + onCustomModelFinalResponse:(text)=>{ if(getInAndroid()) Android.onCustomModelFinalResponse(text); }, + onCustomModelError:(message)=>{ if(getInAndroid()) Android.onCustomModelError(message); }, }; /* ════════════════════════════════════════════════════════ @@ -590,21 +612,33 @@ // System message document.getElementById('sys-msg-textarea').value = Bridge.getSystemMessage(); + // Termux background setting + termuxBackground = Bridge.getTermuxBackground(); + document.getElementById('tb-btn').textContent = termuxBackground ? 'TB' : 'TF'; + // Keyboard height: shift chat-bottom above keyboard using visualViewport if (window.visualViewport) { - window.visualViewport.addEventListener('resize', onViewportResize); - window.visualViewport.addEventListener('scroll', onViewportResize); + window.visualViewport.addEventListener('resize', () => { + onViewportResize(); + updateSysTextareaClass(); + }); + window.visualViewport.addEventListener('scroll', () => { + onViewportResize(); + updateSysTextareaClass(); + }); } // Back-button: listen for popstate to go back to menu window.addEventListener('popstate', (e) => { const chatActive = document.getElementById('screen-chat').classList.contains('active'); if (chatActive) { - _showMenu(); + navigateToMenu(); } }); }); + + function onViewportResize() { const vv = window.visualViewport; const chatBottom = document.getElementById('chat-bottom'); @@ -676,7 +710,8 @@ document.getElementById('screen-menu').classList.remove('active'); document.getElementById('screen-chat').classList.add('active'); // Sync system message fresh from app - document.getElementById('sys-msg-textarea').value = Bridge.getSystemMessage(); + const msg = Bridge.getSystemMessage(); + document.getElementById('sys-msg-textarea').value = msg; scrollToBottom(); } function navigateToMenu() { @@ -722,19 +757,177 @@ const ordered = [ ...MODELS.filter(m=>firstIds.has(m.id)), - ...MODELS.filter(m=>!firstIds.has(m.id)&&!vercelIds.has(m.id)&&!strikeIds.has(m.id)&&m.apiProvider!=='HUMAN_EXPERT'), + ...MODELS.filter(m=>!firstIds.has(m.id)&&!vercelIds.has(m.id)&&!strikeIds.has(m.id)&&m.apiProvider!=='HUMAN_EXPERT'&&m.apiProvider!=='CUSTOM'), ...MODELS.filter(m=>m.apiProvider==='HUMAN_EXPERT'&&!strikeIds.has(m.id)), ...MODELS.filter(m=>vercelIds.has(m.id)&&!strikeIds.has(m.id)), + ...MODELS.filter(m=>m.apiProvider==='CUSTOM'&&!strikeIds.has(m.id)), ...MODELS.filter(m=>strikeIds.has(m.id)), ]; ordered.forEach((m,i) => { if (!m.strike && ordered[i-1]?.strike) addSection('Deactivated'); if (m.apiProvider==='VERCEL' && !m.strike && ordered[i-1]?.apiProvider!=='VERCEL') addSection('Vercel'); + if (m.apiProvider==='CUSTOM' && !m.strike && ordered[i-1]?.apiProvider!=='CUSTOM') addSection('Custom'); addItem(m); }); } +/* ════════════════════════════════════════════════════════ + CUSTOM MODELS + Optional file "custom-models.json" next to this index.html (see docs/command-pattern- + overrides.md's sibling doc for the format). Each entry describes a genuinely new model/ + provider - id, endpoint, modelName, etc. - with no corresponding native ModelOption at + all. They show up in the model picker like any built-in model; the actual API call for + them is made right here in JS (see window.onCustomModelRequest below) once selected. +════════════════════════════════════════════════════════ */ +function mergeCustomModelsIntoDropdown(json) { + let entries; + try { + entries = JSON.parse(json); + } catch (e) { + return; + } + if (!Array.isArray(entries)) return; + + // Remove any previously merged custom entries first, so re-applying a config (e.g. after + // it changed) doesn't accumulate stale duplicates. + MODELS = MODELS.filter(m => m.apiProvider !== 'CUSTOM'); + + entries.forEach(e => { + if (!e || !e.id || !e.endpoint || !e.modelName) return; + MODELS.push({ + id: e.id, + displayName: e.displayName || e.id, + apiProvider: 'CUSTOM', + supportsTopK: !!e.supportsTopK, + supportsScreenshot: !!e.supportsScreenshot, + isOffline: false, + strike: false, + hint: 'Custom model (defined in custom-models.json)' + }); + }); + + buildModelDropdown(); + applyModelSelection(currentModelId); +} + +/** + * Called by native (PhotoReasoningViewModel.reasonWithCustomJsModel, via MainActivity) once + * per turn when the currently active model is a custom one. `payloadJsonString` contains + * everything needed to build the request: system message, db-entries text, sanitized chat + * history, the current user text, optional base64 image data URIs, and the model's endpoint/ + * auth/streaming config (see CustomModelDefinition on the native side). + * + * This is the actual network call for custom models: it runs as a normal `fetch()` here in + * the WebView, not in native Kotlin networking code - which is what lets a brand-new model/ + * provider be added with zero app release (as long as its endpoint is reachable via fetch() + * from this page, i.e. supports CORS - verify this per provider). + * + * Reports back via Bridge.onCustomModelPartialResponse/onCustomModelFinalResponse/ + * onCustomModelError, which feed into the exact same chat/command-processing pipeline native + * code already uses for every other model. + */ +window.onCustomModelRequest = async function(payloadJsonString) { + let payload; + try { + payload = JSON.parse(payloadJsonString); + } catch (e) { + Bridge.onCustomModelError('Invalid request payload: ' + e.message); + return; + } + + const messages = []; + const systemParts = []; + if (payload.systemMessage) systemParts.push(payload.systemMessage); + if (payload.databaseEntries) systemParts.push('Additional context from database:\n' + payload.databaseEntries); + if (systemParts.length) messages.push({ role: 'system', content: systemParts.join('\n\n') }); + + (payload.history || []).forEach(m => { + if (m && m.text) messages.push({ role: m.role === 'user' ? 'user' : 'assistant', content: m.text }); + }); + + if (payload.images && payload.images.length > 0) { + const content = []; + if (payload.userText) content.push({ type: 'text', text: payload.userText }); + payload.images.forEach(dataUri => content.push({ type: 'image_url', image_url: { url: dataUri } })); + messages.push({ role: 'user', content: content }); + } else { + messages.push({ role: 'user', content: payload.userText || '' }); + } + + const headers = { 'Content-Type': 'application/json' }; + if (payload.apiKeyHeader && payload.apiKey) { + headers[payload.apiKeyHeader] = (payload.apiKeyPrefix || '') + payload.apiKey; + } + + const requestBody = JSON.stringify({ + model: payload.modelName, + messages: messages, + stream: !!payload.stream, + temperature: payload.temperature, + top_p: payload.topP, + ...(payload.topK !== undefined ? { top_k: payload.topK } : {}) + }); + + window.__customModelAbortController = new AbortController(); + let acc = ''; + + try { + const response = await fetch(payload.endpoint, { + method: 'POST', + headers: headers, + body: requestBody, + signal: window.__customModelAbortController.signal + }); + + if (!response.ok) { + let detail = ''; + try { detail = (await response.text()).slice(0, 300); } catch (e) {} + throw new Error('HTTP ' + response.status + (detail ? (': ' + detail) : '')); + } + + if (payload.stream && response.body) { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop(); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.startsWith('data:')) continue; + const data = trimmed.slice(5).trim(); + if (data === '[DONE]' || data === '') continue; + try { + const json = JSON.parse(data); + const delta = json.choices && json.choices[0] && json.choices[0].delta && json.choices[0].delta.content; + if (delta) { + acc += delta; + Bridge.onCustomModelPartialResponse(acc); + } + } catch (e) { /* ignore malformed SSE chunk */ } + } + } + } else { + const json = await response.json(); + acc = (json.choices && json.choices[0] && json.choices[0].message && json.choices[0].message.content) || ''; + } + + Bridge.onCustomModelFinalResponse(acc); + } catch (e) { + if (e && e.name === 'AbortError') { + Bridge.onCustomModelFinalResponse(acc + (acc ? '\n\n' : '') + '[stopped by user]'); + } else { + Bridge.onCustomModelError(e && e.message ? e.message : String(e)); + } + } finally { + window.__customModelAbortController = null; + } +}; + function toggleModelDropdown() { const menu = document.getElementById('model-dropdown-menu'); menu.classList.toggle('open'); @@ -827,9 +1020,10 @@ } /* ════════════════════════════════════════════════════════ - SYSTEM MESSAGE + SYSTEM MESSAGE & KEYBOARD / VIEWPORT SYNC ════════════════════════════════════════════════════════ */ let sysTimeout = null; + function onSystemMessageChange() { clearTimeout(sysTimeout); autoGrowTextarea(document.getElementById('sys-msg-textarea')); @@ -837,26 +1031,103 @@ Bridge.setSystemMessage(document.getElementById('sys-msg-textarea').value); }, 600); } -function onSysTextareaFocus() { + +function updateSysTextareaClass() { const ta = document.getElementById('sys-msg-textarea'); - const vv = window.visualViewport; - const keyH = vv ? (window.innerHeight - vv.height - vv.offsetTop) : 0; - ta.className = keyH > 10 ? 'expanded-keyboard' : 'expanded-full'; - autoGrowTextarea(ta); + if (document.activeElement !== ta) return; + const isKeyboard = window.visualViewport && (window.visualViewport.height < window.innerHeight * 0.88); + ta.className = isKeyboard ? 'expanded-keyboard' : 'expanded-full'; +} + +function onSysTextareaFocus() { + updateSysTextareaClass(); + autoGrowTextarea(document.getElementById('sys-msg-textarea')); } + function onSysTextareaBlur() { const ta = document.getElementById('sys-msg-textarea'); - ta.className = 'collapsed'; + setTimeout(() => { + if (document.activeElement !== ta) { + ta.className = 'collapsed'; + } + }, 150); } + function restoreSystemMessage() { - const ta = document.getElementById('sys-msg-textarea'); - ta.value = DEFAULT_SYSTEM_MSG; - Bridge.setSystemMessage(DEFAULT_SYSTEM_MSG); - autoGrowTextarea(ta); + Bridge.restoreSystemMessage(); } + // Android can call this when system message changes externally window.onSystemMessageChanged = function(msg) { - document.getElementById('sys-msg-textarea').value = msg; + const ta = document.getElementById('sys-msg-textarea'); + if (document.activeElement !== ta) { + ta.value = msg; + autoGrowTextarea(ta); + } +}; + +window.onTrialStateChanged = function(isExpired, isPurchased, message) { + // Refresh the donation card (show/hide the Pro button). + updateDonationCard(); +}; + +window.onAndroidReady = function() { + currentModelId = Bridge.getSelectedModelId(); + buildModelDropdown(); + applyModelSelection(currentModelId); + loadGenerationSettings(); + loadBackendPreference(); + updateDonationCard(); + + // System message + document.getElementById('sys-msg-textarea').value = Bridge.getSystemMessage(); + + // Termux background setting + termuxBackground = Bridge.getTermuxBackground(); + document.getElementById('tb-btn').textContent = termuxBackground ? 'TB' : 'TF'; + + // ── Remote config (see docs/command-pattern-overrides.md) ────────────────── + // Both files are optional next to this index.html, fetched relative to the WebView's + // base URL. Missing/invalid files are ignored - the app falls back to built-in behavior. + + // New/alternate command syntax for existing actions (e.g. a new model emits + // "Click('...')" instead of "click(\"...\")") - no app update needed to support it. + fetch('command-patterns.json', { cache: 'no-store' }) + .then(r => r.ok ? r.text() : null) + .then(json => { if (json) Bridge.setCommandPatternOverrides(json); }) + .catch(() => { /* optional file */ }); + + // Genuinely new models/providers, defined entirely in JSON. The actual API call for + // these happens in window.onCustomModelRequest() below (fetch()), not in native code. + fetch('custom-models.json', { cache: 'no-store' }) + .then(r => r.ok ? r.text() : null) + .then(json => { + if (!json) return; + Bridge.setCustomModelOverrides(json); + mergeCustomModelsIntoDropdown(json); + }) + .catch(() => { /* optional file */ }); +}; + +// Android System Back Button Handler (returns Boolean to WebViewBridge) +window.onBackPressed = function() { + if (document.getElementById('popup-edit-entry').classList.contains('open')) { + closePopup('popup-edit-entry'); + return true; + } + if (document.getElementById('popup-db').classList.contains('open')) { + closePopup('popup-db'); + return true; + } + if (document.getElementById('popup-apikey').classList.contains('open')) { + closePopup('popup-apikey'); + return true; + } + if (document.getElementById('screen-chat').classList.contains('active')) { + navigateToMenu(); + return true; + } + return false; }; /* ════════════════════════════════════════════════════════ @@ -900,6 +1171,10 @@ } function stopGeneration() { Bridge.stopGeneration(); + // Custom (JS-driven) models have no native network call to cancel - abort the fetch() here. + if (window.__customModelAbortController) { + window.__customModelAbortController.abort(); + } window.onGenerationStateChanged(false, false); } @@ -912,17 +1187,20 @@ const val = ta.value.trim(); const icon = document.getElementById('send-icon'); icon.className = val ? 'send-icon active' : 'send-icon'; + + if (document.activeElement === ta) { + ta.placeholder = val ? 'Task' : 'Describe step by step if complicated tasks have to be solved'; + } else { + ta.placeholder = 'Task'; + } + autoGrowTextarea(ta); } function onTaskInputFocus() { - const ta = document.getElementById('task-input'); - if (!ta.value.trim()) { - ta.placeholder = 'Describe step by step if complicated tasks have to be solved'; - } + onTaskInput(); } function onTaskInputBlur() { - const ta = document.getElementById('task-input'); - ta.placeholder = 'Task'; + onTaskInput(); } function onTaskKey(e) { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); } @@ -930,15 +1208,15 @@ function sendMessage() { const input = document.getElementById('task-input'); const text = input.value.trim(); - if (!text) return; - addUserBubble(text); + if (!text && selectedImages.length === 0) return; + addUserBubble(text || "(attached images)"); input.value = ''; input.style.height = ''; onTaskInput(); // Pass image URIs to Android if any - if (inAndroid && Android.sendMessageWithImages && selectedImages.length > 0) { - const uris = selectedImages.map(img => img.uri || img.objectUrl).join(','); - Android.sendMessageWithImages(text, uris); + if (selectedImages.length > 0) { + const urisCsv = selectedImages.map(img => img.uri || img.objectUrl).join(','); + Bridge.sendMessageWithImages(text, urisCsv); } else { Bridge.sendMessage(text); } @@ -954,26 +1232,23 @@ let selectedImages = []; function pickImage() { - if (inAndroid && Android.pickImage) { - Android.pickImage(); - } else { - document.getElementById('image-file-input').click(); - } + Bridge.pickImage(); } function onFilesPicked(event) { const files = Array.from(event.target.files); files.forEach(file => { const url = URL.createObjectURL(file); - selectedImages.push({ objectUrl: url, name: file.name }); + const isVideo = file.type.startsWith('video/'); + selectedImages.push({ objectUrl: url, name: file.name, isVideo: isVideo }); }); event.target.value = ''; renderImagePreviews(); } -// Called by Android after user picks a media item: window.onImagePicked(uri) -window.onImagePicked = function(uri) { - selectedImages.push({ uri: uri, name: uri.split('/').pop() }); +// Called by Android after user picks a media item: window.onImagePicked(uri, isVideo) +window.onImagePicked = function(uri, isVideo) { + selectedImages.push({ uri: uri, name: uri.split('/').pop(), isVideo: !!isVideo }); renderImagePreviews(); }; @@ -983,9 +1258,15 @@ selectedImages.forEach((img, i) => { const wrap = document.createElement('div'); wrap.className = 'img-thumb-wrap'; - const src = img.objectUrl || img.uri; - wrap.innerHTML = `${escAttr(img.name)} - `; + if (img.isVideo) { + wrap.innerHTML = ` +
🎬
+ `; + } else { + const src = img.objectUrl || img.uri; + wrap.innerHTML = `${escAttr(img.name)} + `; + } row.appendChild(wrap); }); } @@ -1009,11 +1290,7 @@ function toggleTermuxMode() { termuxBackground = !termuxBackground; document.getElementById('tb-btn').textContent = termuxBackground ? 'TB' : 'TF'; - if (inAndroid && Android.setTermuxBackground) Android.setTermuxBackground(termuxBackground); - const msg = termuxBackground - ? 'Termux commands are executed in the background' - : 'Termux commands are executed in the foreground'; - showToast(msg); + Bridge.setTermuxBackground(termuxBackground); } /* ════════════════════════════════════════════════════════ @@ -1094,6 +1371,7 @@ function renderDbList() { const list = document.getElementById('db-list'); const entries = Bridge.getDatabaseEntries(); + list.innerHTML = ''; if (!entries.length) { list.innerHTML='
No entries yet. Tap "+ New Entry" to add one.
'; @@ -1102,12 +1380,27 @@ entries.forEach(e => { const row = document.createElement('div'); row.className='db-entry'; - row.innerHTML=`${escHtml(e.title)} - `; - row.addEventListener('click', () => openEditEntry(e)); + row.innerHTML=` +
+
${escHtml(e.title)}
+
${escHtml(e.guide.substring(0,60))}...
+
+
+ + +
`; + row.querySelector('div').onclick = () => openEditEntry(e); list.appendChild(row); }); } + +function copyToSystemMessage(text) { + const ta = document.getElementById('sys-msg-textarea'); + ta.value = text; + Bridge.setSystemMessage(text); + autoGrowTextarea(ta); + closePopup('popup-db'); +} function deleteEntry(title) { if (!confirm('Delete "'+title+'"?')) return; Bridge.deleteDatabaseEntry(title); @@ -1127,7 +1420,8 @@ if (editingEntryOriginalTitle) { Bridge.updateDatabaseEntry(editingEntryOriginalTitle, title, guide); } else { - const exists = Bridge.getDatabaseEntries().some(e=>e.title.toLowerCase()===title.toLowerCase()); + const entries = Bridge.getDatabaseEntries(); + const exists = entries.some(e=>e.title.toLowerCase()===title.toLowerCase()); if (exists) { alert('An entry with this title already exists.'); return; } Bridge.addDatabaseEntry(title, guide); }