From 19323cb2750989896380835e003c18bd2f90162e Mon Sep 17 00:00:00 2001 From: tprevot Date: Wed, 25 Feb 2026 18:44:40 +0100 Subject: [PATCH 1/2] feat(search): add list fields endpoint + pagination --- .../protos/quickwit/search.proto | 5 + .../src/codegen/quickwit/quickwit.search.rs | 10 +- quickwit/quickwit-search/src/list_fields.rs | 380 +++++++++++++----- .../model/field_capability.rs | 1 + quickwit/quickwit-serve/src/rest.rs | 4 +- quickwit/quickwit-serve/src/search_api/mod.rs | 5 +- .../src/search_api/rest_handler.rs | 97 ++++- 7 files changed, 400 insertions(+), 102 deletions(-) diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index ae3442fe1aa..4be9db1f436 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -125,6 +125,10 @@ message ListFieldsRequest { optional int64 start_timestamp = 3; optional int64 end_timestamp = 4; + uint64 max_hits = 5; + uint64 start_offset = 6; + repeated SortField sort_fields = 7; + // Control if the request will fail if split_ids contains a split that does not exist. // optional bool fail_on_missing_index = 6; } @@ -146,6 +150,7 @@ message LeafListFieldsRequest { message ListFieldsResponse { repeated ListFieldsEntryResponse fields = 1; + uint64 num_fields = 2; } message ListFieldsEntryResponse { diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 1e933055cd3..0f759a3e39a 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -54,7 +54,7 @@ pub struct ReportSplitsRequest { #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] pub struct ReportSplitsResponse {} #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct ListFieldsRequest { /// Index ID patterns #[prost(string, repeated, tag = "1")] @@ -70,6 +70,12 @@ pub struct ListFieldsRequest { pub start_timestamp: ::core::option::Option, #[prost(int64, optional, tag = "4")] pub end_timestamp: ::core::option::Option, + #[prost(uint64, tag = "5")] + pub max_fields: u64, + #[prost(uint64, tag = "6")] + pub start_offset: u64, + #[prost(message, repeated, tag = "7")] + pub sort_fields: ::prost::alloc::vec::Vec, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -94,6 +100,8 @@ pub struct LeafListFieldsRequest { pub struct ListFieldsResponse { #[prost(message, repeated, tag = "1")] pub fields: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "2")] + pub num_fields: u64, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] diff --git a/quickwit/quickwit-search/src/list_fields.rs b/quickwit/quickwit-search/src/list_fields.rs index f4cf173fe08..1514cd0254d 100644 --- a/quickwit/quickwit-search/src/list_fields.rs +++ b/quickwit/quickwit-search/src/list_fields.rs @@ -224,48 +224,61 @@ fn merge_same_field_group( } } -/// Merge iterators of ListFieldsEntryResponse into a `Vec`. -/// -/// The iterators need to be sorted by (field_name, fieldtype) -fn merge_leaf_list_fields( - iterators: Vec>, -) -> crate::Result> { - let merged = iterators - .into_iter() - .kmerge_by(|a, b| (&a.field_name, a.field_type) <= (&b.field_name, b.field_type)); - let mut responses = Vec::new(); - - let mut current_group: Vec = Vec::new(); - // Build ListFieldsEntryResponse from current group - let flush_group = |responses: &mut Vec<_>, current_group: &mut Vec| { - let entry = merge_same_field_group(current_group); - responses.push(entry); - current_group.clear(); - }; +struct ListFieldMerger> { + merged: itertools::KMergeBy bool>, + current_group: Vec, +} - for entry in merged { - if let Some(last) = current_group.last() - && (last.field_name != entry.field_name || last.field_type != entry.field_type) - { - flush_group(&mut responses, &mut current_group); - } - if responses.len() >= *FIELD_LIST_SIZE_LIMIT { - return Err(SearchError::Internal(format!( - "list fields response exceeded {} fields", - *FIELD_LIST_SIZE_LIMIT - ))); +impl> ListFieldMerger { + fn new(iterators: impl Iterator) -> Self { + //TODO: sort + let cmp_fn: fn(&ListFieldsEntryResponse, &ListFieldsEntryResponse) -> bool = + |a, b| (&a.field_name, a.field_type) <= (&b.field_name, b.field_type); + + let merged = iterators.kmerge_by(cmp_fn); + Self { + merged, + current_group: Vec::new(), } - current_group.push(entry); - } - if !current_group.is_empty() { - flush_group(&mut responses, &mut current_group); } +} - Ok(responses) +impl> Iterator for ListFieldMerger { + type Item = ListFieldsEntryResponse; + + fn next(&mut self) -> Option { + loop { + match self.merged.next() { + Some(entry) => { + if let Some(last) = self.current_group.last() + && (last.field_name != entry.field_name + || last.field_type != entry.field_type) + { + let result = merge_same_field_group(&mut self.current_group); + self.current_group.clear(); + self.current_group.push(entry); + return Some(result); + } + self.current_group.push(entry); + } + None => { + if !self.current_group.is_empty() { + let result = merge_same_field_group(&mut self.current_group); + self.current_group.clear(); + return Some(result); + } + return None; + } + } + } + } } // Returns true if any of the patterns match the field name. fn matches_any_pattern(field_name: &str, field_patterns: &[FieldPattern]) -> bool { + if field_patterns.is_empty() { + return true; + } field_patterns .iter() .any(|pattern| pattern.matches(field_name)) @@ -356,23 +369,22 @@ pub async fn leaf_list_fields( } } - let filtered_list_fields_sorted_iters: Vec<_> = single_split_list_fields_vec - .into_iter() - .map(|list_fields_sorted| { - list_fields_sorted.into_iter().filter(|field| { - if field_patterns.is_empty() { - true - } else { - matches_any_pattern(&field.field_name, &field_patterns) - } - }) - }) - .collect(); - merge_leaf_list_fields(filtered_list_fields_sorted_iters) + let filtered_list_fields_sorted_iters = + single_split_list_fields_vec + .into_iter() + .map(|list_fields_sorted| { + list_fields_sorted + .into_iter() + .filter(|field| matches_any_pattern(&field.field_name, &field_patterns)) + }); + + ListFieldMerger::new(filtered_list_fields_sorted_iters).collect::>() }) .await - .context("failed to merge single split list fields")??; - Ok(ListFieldsResponse { fields }) + .context("failed to merge single split list fields")?; + + let num_fields = fields.len() as u64; + Ok(ListFieldsResponse { fields, num_fields }) } /// Index metas needed for executing a leaf search request. @@ -397,8 +409,9 @@ pub async fn root_list_fields( resolve_index_patterns(&list_fields_req.index_id_patterns[..], &mut metastore).await?; // The request contains a wildcard, but couldn't find any index. if indexes_metadata.is_empty() { - return Ok(ListFieldsResponse { fields: Vec::new() }); + return Ok(ListFieldsResponse::default()); } + let index_uid_to_index_meta: HashMap = indexes_metadata .iter() .map(|index_metadata| { @@ -442,18 +455,31 @@ pub async fn root_list_fields( } } let leaf_list_fields_protos: Vec = try_join_all(leaf_request_tasks).await?; - let fields = search_thread_pool() + let (fields, num_fields) = search_thread_pool() .run_cpu_intensive(move || { - let leaf_list_fields = leaf_list_fields_protos + let fields_iter = leaf_list_fields_protos .into_iter() - .map(|leaf_list_fields_proto| leaf_list_fields_proto.fields.into_iter()) - .collect(); - merge_leaf_list_fields(leaf_list_fields) + .map(|leaf_list_fields_proto| leaf_list_fields_proto.fields.into_iter()); + let mut fields_iter = ListFieldMerger::new(fields_iter); + let skipped = fields_iter + .by_ref() + .take(list_fields_req.start_offset as usize) + .count(); + + let fields = fields_iter + .by_ref() + .take(list_fields_req.max_fields as usize) + .collect::>(); + + let remaining = fields_iter.count(); + let num_fields = (skipped + fields.len() + remaining) as u64; + + (fields, num_fields) }) .await - .context("failed to merge leaf list fields responses")??; + .context("failed to merge leaf list fields responses")?; - Ok(ListFieldsResponse { fields }) + Ok(ListFieldsResponse { fields, num_fields }) } /// Builds a list of [`LeafListFieldsRequest`], one per index, from a list of [`SearchJob`]. @@ -512,11 +538,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index1".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); assert_eq!(resp, vec![entry1]); } #[test] @@ -539,11 +568,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index1".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); assert_eq!(resp, vec![entry1, entry2]); } #[test] @@ -566,11 +598,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index2".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); let expected = ListFieldsEntryResponse { field_name: "field1".to_string(), field_type: ListFieldType::Str as i32, @@ -602,11 +637,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index2".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); let expected = ListFieldsEntryResponse { field_name: "field1".to_string(), field_type: ListFieldType::Str as i32, @@ -647,11 +685,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index1".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone(), entry2.clone()].into_iter(), - vec![entry3.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone(), entry2.clone()].into_iter(), + vec![entry3.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); assert_eq!(resp, vec![entry1.clone(), entry3.clone()]); } #[test] @@ -683,11 +724,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index1".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone(), entry3.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone(), entry3.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); assert_eq!(resp, vec![entry1.clone(), entry3.clone()]); } #[test] @@ -719,11 +763,14 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index1".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone(), entry3.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone(), entry3.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); assert_eq!(resp, vec![entry1.clone(), entry3.clone()]); } #[test] @@ -750,11 +797,15 @@ mod tests { non_aggregatable_index_ids: Vec::new(), index_ids: vec!["index4".to_string()], }; - let resp = merge_leaf_list_fields(vec![ - vec![entry1.clone()].into_iter(), - vec![entry2.clone()].into_iter(), - ]) - .unwrap(); + let resp: Vec<_> = ListFieldMerger::new( + vec![ + vec![entry1.clone()].into_iter(), + vec![entry2.clone()].into_iter(), + ] + .into_iter(), + ) + .collect::>(); + let expected = ListFieldsEntryResponse { field_name: "field1".to_string(), field_type: ListFieldType::Str as i32, @@ -772,6 +823,141 @@ mod tests { assert_eq!(resp, vec![expected]); } + fn make_entry( + field_name: &str, + field_type: ListFieldType, + index_id: &str, + ) -> ListFieldsEntryResponse { + ListFieldsEntryResponse { + field_name: field_name.to_string(), + field_type: field_type as i32, + searchable: true, + aggregatable: true, + non_searchable_index_ids: Vec::new(), + non_aggregatable_index_ids: Vec::new(), + index_ids: vec![index_id.to_string()], + } + } + + #[test] + fn merge_iter_skip_take_basic_pagination() { + // 4 distinct fields spread across 2 leaves, paginate with skip=1, take=2 + let leaf1 = vec![ + make_entry("aaa", ListFieldType::Str, "idx1"), + make_entry("ccc", ListFieldType::Str, "idx1"), + ]; + let leaf2 = vec![ + make_entry("bbb", ListFieldType::Str, "idx2"), + make_entry("ddd", ListFieldType::Str, "idx2"), + ]; + + let all: Vec<_> = ListFieldMerger::new( + vec![leaf1.clone().into_iter(), leaf2.clone().into_iter()].into_iter(), + ) + .collect::>(); + assert_eq!(all.len(), 4); + assert_eq!(all[0].field_name, "aaa"); + assert_eq!(all[1].field_name, "bbb"); + assert_eq!(all[2].field_name, "ccc"); + assert_eq!(all[3].field_name, "ddd"); + + // Page 2: skip 1, take 2 → ["bbb", "ccc"] + let page: Vec<_> = ListFieldMerger::new( + vec![leaf1.clone().into_iter(), leaf2.clone().into_iter()].into_iter(), + ) + .skip(1) + .take(2) + .collect::>(); + assert_eq!(page.len(), 2); + assert_eq!(page[0].field_name, "bbb"); + assert_eq!(page[1].field_name, "ccc"); + } + + #[test] + fn merge_iter_skip_take_with_grouping() { + // Same field appears in multiple leaves — grouping must happen before pagination + let leaf1 = vec![ + make_entry("aaa", ListFieldType::Str, "idx1"), + make_entry("bbb", ListFieldType::Str, "idx1"), + ]; + let leaf2 = vec![ + make_entry("aaa", ListFieldType::Str, "idx2"), + make_entry("ccc", ListFieldType::Str, "idx2"), + ]; + + // Without pagination: 3 merged fields [aaa(idx1+idx2), bbb, ccc] + let all: Vec<_> = ListFieldMerger::new( + vec![leaf1.clone().into_iter(), leaf2.clone().into_iter()].into_iter(), + ) + .collect::>(); + assert_eq!(all.len(), 3); + assert_eq!(all[0].field_name, "aaa"); + assert_eq!( + all[0].index_ids, + vec!["idx1".to_string(), "idx2".to_string()] + ); + assert_eq!(all[1].field_name, "bbb"); + assert_eq!(all[2].field_name, "ccc"); + + // skip=1, take=1 → ["bbb"] (skips the merged "aaa") + let page: Vec<_> = ListFieldMerger::new( + vec![leaf1.clone().into_iter(), leaf2.clone().into_iter()].into_iter(), + ) + .skip(1) + .take(1) + .collect::>(); + assert_eq!(page.len(), 1); + assert_eq!(page[0].field_name, "bbb"); + } + + #[test] + fn merge_iter_skip_beyond_end() { + let leaf1 = vec![make_entry("aaa", ListFieldType::Str, "idx1")]; + let page: Vec<_> = ListFieldMerger::new(vec![leaf1.into_iter()].into_iter()) + .skip(10) + .take(5) + .collect::>(); + assert!(page.is_empty()); + } + + #[test] + fn merge_iter_take_more_than_available() { + let leaf1 = vec![ + make_entry("aaa", ListFieldType::Str, "idx1"), + make_entry("bbb", ListFieldType::Str, "idx1"), + ]; + let page: Vec<_> = ListFieldMerger::new(vec![leaf1.into_iter()].into_iter()) + .take(100) + .collect::>(); + assert_eq!(page.len(), 2); + } + + #[test] + fn merge_iter_pagination_with_mixed_types() { + // Same field name but different types → treated as separate entries + let leaf1 = vec![ + make_entry("field", ListFieldType::Str, "idx1"), + make_entry("field", ListFieldType::U64, "idx1"), + ]; + let leaf2 = vec![make_entry("field", ListFieldType::Str, "idx2")]; + + // 2 merged entries: field/Str(idx1+idx2), field/U64(idx1) + let all: Vec<_> = ListFieldMerger::new( + vec![leaf1.clone().into_iter(), leaf2.clone().into_iter()].into_iter(), + ) + .collect::>(); + assert_eq!(all.len(), 2); + + // skip=1 → only field/U64 + let page: Vec<_> = + ListFieldMerger::new(vec![leaf1.into_iter(), leaf2.into_iter()].into_iter()) + .skip(1) + .take(10) + .collect::>(); + assert_eq!(page.len(), 1); + assert_eq!(page[0].field_type, ListFieldType::U64 as i32); + } + #[test] fn test_field_pattern() { let prefix_pattern = FieldPattern::from_str("toto*").unwrap(); diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/field_capability.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/field_capability.rs index a382c541dc7..68dd202f70d 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/field_capability.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/field_capability.rs @@ -184,5 +184,6 @@ pub fn build_list_field_request_for_es_api( fields: search_params.fields.unwrap_or_default(), start_timestamp: search_params.start_timestamp, end_timestamp: search_params.end_timestamp, + ..Default::default() }) } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 3f193783b04..d5cb8d8e0cb 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -51,7 +51,8 @@ use crate::node_info_handler::node_info_handler; use crate::otlp_api::otlp_ingest_api_handlers; use crate::rest_api_response::{RestApiError, RestApiResponse}; use crate::search_api::{ - search_get_handler, search_plan_get_handler, search_plan_post_handler, search_post_handler, + list_fields_handler, search_get_handler, search_plan_get_handler, search_plan_post_handler, + search_post_handler, }; use crate::template_api::index_template_api_handlers; use crate::ui_handler::ui_handler; @@ -283,6 +284,7 @@ fn search_routes( .or(search_post_handler(search_service.clone())) .or(search_plan_get_handler(search_service.clone())) .or(search_plan_post_handler(search_service.clone())) + .or(list_fields_handler(search_service.clone())) .recover(recover_fn) .boxed() } diff --git a/quickwit/quickwit-serve/src/search_api/mod.rs b/quickwit/quickwit-serve/src/search_api/mod.rs index ef1f643257c..61973a894db 100644 --- a/quickwit/quickwit-serve/src/search_api/mod.rs +++ b/quickwit/quickwit-serve/src/search_api/mod.rs @@ -17,7 +17,8 @@ mod rest_handler; pub use self::grpc_adapter::GrpcSearchAdapter; pub use self::rest_handler::{ - SearchApi, SearchRequestQueryString, SortBy, search_get_handler, search_plan_get_handler, - search_plan_post_handler, search_post_handler, search_request_from_api_request, + SearchApi, SearchRequestQueryString, SortBy, list_fields_handler, search_get_handler, + search_plan_get_handler, search_plan_post_handler, search_post_handler, + search_request_from_api_request, }; pub(crate) use self::rest_handler::{extract_index_id_patterns, extract_index_id_patterns_default}; diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index b1400fa12c0..3d149143b2d 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use percent_encoding::percent_decode_str; use quickwit_config::validate_index_id_pattern; -use quickwit_proto::search::{CountHits, SortField, SortOrder}; +use quickwit_proto::search::{CountHits, ListFieldsRequest, SortField, SortOrder}; use quickwit_query::query_ast::query_ast_from_user_text; use quickwit_search::{SearchError, SearchPlanResponseRest, SearchResponseRest, SearchService}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -36,12 +36,14 @@ use crate::{BodyFormat, with_arg}; search_post_handler, search_plan_get_handler, search_plan_post_handler, + list_fields_handler, ), components(schemas( BodyFormat, SearchRequestQueryString, SearchResponseRest, SearchPlanResponseRest, + ListFieldsQueryString, SortBy, SortField, SortOrder, @@ -240,6 +242,48 @@ mod count_hits_from_bool { } } +/// This struct represents the QueryString passed to +/// the rest API. +#[derive( + Debug, Default, Eq, PartialEq, Serialize, Deserialize, utoipa::IntoParams, utoipa::ToSchema, +)] +#[into_params(parameter_in = Query)] +#[serde(deny_unknown_fields)] +pub struct ListFieldsQueryString { + /// Field names to filter on. It can be a comma-separated list of field names. If empty, all + /// fields are returned. It also supports wildcards. + #[serde(default)] + pub fields: String, + /// If set, restrict search to documents with a `timestamp >= start_timestamp`. + /// This timestamp is expressed in seconds. + #[serde(skip_serializing_if = "Option::is_none")] + pub start_timestamp: Option, + /// If set, restrict search to documents with a `timestamp < end_timestamp``. + /// This timestamp is expressed in seconds. + #[serde(skip_serializing_if = "Option::is_none")] + pub end_timestamp: Option, + /// Maximum number of fields to return (by default 20). + #[serde(default = "default_max_hits")] + pub max_fields: u64, + /// First hit to return. Together with num_hits, this parameter + /// can be used for pagination. + /// + /// E.g. + /// The results with rank [start_offset..start_offset + max_hits) are returned + #[serde(default)] // Default to 0. (We are 0-indexed) + pub start_offset: u64, + /// The output format. + #[serde(default)] + pub format: BodyFormat, + /// Specifies how documents are sorted. + #[serde(alias = "sort_by_field")] + #[serde(deserialize_with = "sort_by_mini_dsl")] + #[serde(default)] + #[serde(skip_serializing_if = "SortBy::is_empty")] + #[param(value_type = String)] + pub sort_by: SortBy, +} + pub fn search_request_from_api_request( index_id_patterns: Vec, search_request: SearchRequestQueryString, @@ -328,6 +372,14 @@ fn search_plan_post_filter() .and(warp::body::json()) } +fn list_fields_filter() +-> impl Filter, ListFieldsQueryString), Error = Rejection> + Clone { + warp::path!(String / "list-fields") + .and_then(extract_index_id_patterns) + .and(warp::get()) + .and(warp::query()) +} + async fn search( index_id_patterns: Vec, search_request: SearchRequestQueryString, @@ -355,6 +407,28 @@ async fn search_plan( into_rest_api_response(result, body_format) } +async fn list_fields( + index_id_patterns: Vec, + request: ListFieldsQueryString, + search_service: Arc, +) -> impl warp::Reply { + let req = ListFieldsRequest { + end_timestamp: request.end_timestamp, + start_timestamp: request.start_timestamp, + fields: if !request.fields.is_empty() { + request.fields.split(',').map(String::from).collect() + } else { + Default::default() + }, + index_id_patterns, + max_fields: request.max_fields, + sort_fields: request.sort_by.sort_fields, + start_offset: request.start_offset, + }; + let result = search_service.root_list_fields(req).await; + into_rest_api_response(result, BodyFormat::Json) +} + #[utoipa::path( get, tag = "Search", @@ -449,6 +523,27 @@ pub fn search_plan_post_handler( .then(search_plan) } +#[utoipa::path( + get, + tag = "Search", + path = "/{index_id}/list-fields", + responses( + (status = 200, description = "Successfully retrieved list of fields.", body = [String]) + ), + params( + ("index_id" = String, Path, description = "The index ID to search."), + ListFieldsQueryString + ) +)] +/// List Fields +pub fn list_fields_handler( + search_service: Arc, +) -> impl Filter + Clone { + list_fields_filter() + .and(with_arg(search_service)) + .then(list_fields) +} + #[cfg(test)] mod tests { use assert_json_diff::{assert_json_eq, assert_json_include}; From 1288bfb5d5f495ad037c57d967762e56265213c3 Mon Sep 17 00:00:00 2001 From: tprevot Date: Thu, 26 Feb 2026 11:43:19 +0100 Subject: [PATCH 2/2] fix: minor things --- .../protos/quickwit/search.proto | 3 +-- .../src/codegen/quickwit/quickwit.search.rs | 4 +--- quickwit/quickwit-search/src/list_fields.rs | 19 ++++--------------- .../src/search_api/rest_handler.rs | 8 -------- 4 files changed, 6 insertions(+), 28 deletions(-) diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index 4be9db1f436..999c9236484 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -125,9 +125,8 @@ message ListFieldsRequest { optional int64 start_timestamp = 3; optional int64 end_timestamp = 4; - uint64 max_hits = 5; + uint64 max_fields = 5; uint64 start_offset = 6; - repeated SortField sort_fields = 7; // Control if the request will fail if split_ids contains a split that does not exist. // optional bool fail_on_missing_index = 6; diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 0f759a3e39a..ea3b1b9dac1 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -54,7 +54,7 @@ pub struct ReportSplitsRequest { #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] pub struct ReportSplitsResponse {} #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct ListFieldsRequest { /// Index ID patterns #[prost(string, repeated, tag = "1")] @@ -74,8 +74,6 @@ pub struct ListFieldsRequest { pub max_fields: u64, #[prost(uint64, tag = "6")] pub start_offset: u64, - #[prost(message, repeated, tag = "7")] - pub sort_fields: ::prost::alloc::vec::Vec, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/quickwit/quickwit-search/src/list_fields.rs b/quickwit/quickwit-search/src/list_fields.rs index 1514cd0254d..0dbc42256c0 100644 --- a/quickwit/quickwit-search/src/list_fields.rs +++ b/quickwit/quickwit-search/src/list_fields.rs @@ -15,7 +15,7 @@ use std::collections::{HashMap, HashSet}; use std::path::Path; use std::str::FromStr; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; use anyhow::Context; use futures::future; @@ -41,16 +41,6 @@ use crate::{ search_thread_pool, }; -/// QW_FIELD_LIST_SIZE_LIMIT defines a hard limit on the number of fields that -/// can be returned (error otherwise). -/// -/// Having many fields can happen when a user is creating fields dynamically in -/// a JSON type with random field names. This leads to huge memory consumption -/// when building the response. This is a workaround until a way is found to -/// prune the long tail of rare fields. -static FIELD_LIST_SIZE_LIMIT: LazyLock = - LazyLock::new(|| quickwit_common::get_from_env("QW_FIELD_LIST_SIZE_LIMIT", 100_000, false)); - const DYNAMIC_FIELD_PREFIX: &str = "_dynamic."; /// Get the list of fields in the given split. @@ -231,9 +221,8 @@ struct ListFieldMerger> { impl> ListFieldMerger { fn new(iterators: impl Iterator) -> Self { - //TODO: sort let cmp_fn: fn(&ListFieldsEntryResponse, &ListFieldsEntryResponse) -> bool = - |a, b| (&a.field_name, a.field_type) <= (&b.field_name, b.field_type); + |a, b| field_order(a, b) == std::cmp::Ordering::Less; let merged = iterators.kmerge_by(cmp_fn); Self { @@ -466,10 +455,10 @@ pub async fn root_list_fields( .take(list_fields_req.start_offset as usize) .count(); - let fields = fields_iter + let fields: Vec = fields_iter .by_ref() .take(list_fields_req.max_fields as usize) - .collect::>(); + .collect(); let remaining = fields_iter.count(); let num_fields = (skipped + fields.len() + remaining) as u64; diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index 3d149143b2d..c663dd1324c 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -275,13 +275,6 @@ pub struct ListFieldsQueryString { /// The output format. #[serde(default)] pub format: BodyFormat, - /// Specifies how documents are sorted. - #[serde(alias = "sort_by_field")] - #[serde(deserialize_with = "sort_by_mini_dsl")] - #[serde(default)] - #[serde(skip_serializing_if = "SortBy::is_empty")] - #[param(value_type = String)] - pub sort_by: SortBy, } pub fn search_request_from_api_request( @@ -422,7 +415,6 @@ async fn list_fields( }, index_id_patterns, max_fields: request.max_fields, - sort_fields: request.sort_by.sort_fields, start_offset: request.start_offset, }; let result = search_service.root_list_fields(req).await;